In [1]:
import numpy as np
import pandas as pd
import pyfixest as pf
from IPython.display import display, FileLink
from tabout import TabOut,  DTable, ETable

# Create a multiindex dataframe with random data
row_index = pd.MultiIndex.from_tuples(
    [
        ("Gr 1", "Var 1"),
        ("Gr 1", "Var 2"),
        ("Gr 1", "Var 3"),
        ("Gr 2", "Var 4"),
        ("Gr 2", "Var 5"),
        ("Gr 3", "Var 6"),
    ]
)

col_index = pd.MultiIndex.from_product([["Hello", "There"], ["France", "US"], ["High", "Low"]])
dfa = pd.DataFrame(np.random.randn(6, 8).round(3), index=row_index, columns=col_index)


tab = TabOut(dfa, notes="Sample very new notes!", caption="The first table", tab_label="tab_sample_1")

# Generate table
# tab.make()

# Save table
tab.save(type="docx", file_name="C:/PythonProjects/tabout/docs/test_tab1", replace=True)

# Update word document with table
#tab.update_docx(file_name="C:/PythonProjects/tabout/docs/PaperTest.docx", tab_num=1, show=False)

#pf.make_table(df=df, caption="This is table 7!", notes="These are brand new notes", type="docx", 
#          file_name="C:/PythonProjects/pyfixestStore/test.docx", tab_num=7)

The first table,The first table,The first table,The first table,The first table,The first table,The first table,The first table,The first table
Unnamed: 0_level_1,Hello,Hello,Hello,Hello,There,There,There,There
Unnamed: 0_level_2,France,France,US,US,France,France,US,US
Unnamed: 0_level_3,High,Low,High,Low,High,Low,High,Low
Gr 1,Gr 1,Gr 1,Gr 1,Gr 1,Gr 1,Gr 1,Gr 1,Gr 1
Var 1,0.135,-0.76,-0.692,2.447,-0.257,0.372,-1.134,0.68
Var 2,-0.512,0.498,1.44,0.837,1.404,-0.019,-0.008,-0.434
Var 3,1.745,0.568,0.664,-1.232,2.179,0.946,1.058,1.722
Gr 2,Gr 2,Gr 2,Gr 2,Gr 2,Gr 2,Gr 2,Gr 2,Gr 2
Var 4,1.163,0.624,-0.143,-0.992,-0.402,-0.128,-0.99,1.272
Var 5,0.186,-1.716,0.132,-0.747,-0.576,1.039,-0.11,-0.398
Gr 3,Gr 3,Gr 3,Gr 3,Gr 3,Gr 3,Gr 3,Gr 3,Gr 3
Var 6,-0.374,-0.868,-0.962,0.319,-0.096,-0.412,0.144,0.221
Sample very new notes!,Sample very new notes!,Sample very new notes!,Sample very new notes!,Sample very new notes!,Sample very new notes!,Sample very new notes!,Sample very new notes!,Sample very new notes!


In [2]:
n=500
df1 = pd.DataFrame(index=range(n))
df1['ability'] = np.random.normal(100, 15, n)
df1['year'] = 1
# This is a number identifying each person in the data set
df1['persnr'] = df1.index
# Add age
df1['age'] = np.random.randint(18, 60, n)
# Add city cluster: assign each person to a city (e.g., 20 cities)
n_cities = 20
city_names = [f'City_{i+1}' for i in range(n_cities)]
df1['city'] = np.random.choice(city_names, size=n)
# Generate city-specific effects
city_effects = {city: np.random.normal(0, 2000) for city in city_names}
# Randomize three treatment groups (0, 1, 2)
df1['tgroup'] = np.random.choice([0, 1, 2], size=n, p=[1/3, 1/3, 1/3])
# Copy the DataFrame to create second year observations stored in df2
df2 = df1.copy()
df2['year'] = 2
df2['age'] = df2['age'] + 1
# Training only in the treatment group and only in year 2:
df1['training'] = 0
df2['training'] = (df2.tgroup > 0).astype(int)
# Generate DataFrame that spans both years combining the two:
# Note for each person we have two rows, one for each year
df = pd.concat([df1, df2], sort=False)
# Generate sales as before
# but add a year effect: sales increase by 4000 Euro between years 1 and 2
# and different treatment effects for each group
# and a city-specific effect
df['sales'] = (
    10000
    + (df.tgroup == 1) * 5000
    + (df.tgroup == 2) * 8000
    + df.training * 2000
    + df.ability * 100
    + df.year * 4000
    + df['city'].map(city_effects)
    + np.random.normal(0, 4000, 2 * n)
 )

labels = {"ability": "Ability (test score)",
    "sales": "Sales",
    "year": "Year",
    "age": "Age",
    "tgroup": "Treatment Group",
    "city": "City",}

df["tgroup"] = pd.Categorical(df["tgroup"], categories=[0, 1, 2], ordered=True)
df["tgroup"] = df["tgroup"].cat.rename_categories(["Control", "Treatment 1", "Treatment 2"])


In [3]:
#est=pf.feols("sales ~ ability + age + training | city", data=df)
est=pf.feols("sales ~ ability + csw(age, training) | sw0(city)", data=df, 
             vcov={"CRV1": "city"})
pf.etable(est, labels=labels)

Unnamed: 0_level_0,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,(1),(2),(3),(4)
coef,coef,coef,coef,coef
Ability (test score),91.733*** (14.181),94.672*** (12.521),84.573*** (13.399),87.407*** (11.986)
Age,-12.047 (21.425),-16.529 (18.436),-14.170 (21.139),-17.513 (15.787)
training,,7671.183*** (285.133),,7748.827*** (232.928)
Intercept,2.289E+04*** (1840.156),2.010E+04*** (1637.706),,
fe,fe,fe,fe,fe
City,-,-,x,x
stats,stats,stats,stats,stats
Observations,1000,1000,1000,1000
S.E. type,by: city,by: city,by: city,by: city


In [15]:
et1 = ETable(est, labels=labels, model_stats=["N","r2","r2_within"])
et1

Unnamed: 0_level_0,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,(1),(2),(3),(4)
coef,coef,coef,coef,coef
Ability (test score),91.733*** (14.181),94.672*** (12.521),84.573*** (13.399),87.407*** (11.986)
Age,-12.047 (21.425),-16.529 (18.436),-14.170 (21.139),-17.513 (15.787)
training,,7671.183*** (285.133),,7748.827*** (232.928)
Intercept,2.289E+04*** (1840.156),2.010E+04*** (1637.706),,
fe,fe,fe,fe,fe
City,-,-,x,x
stats,stats,stats,stats,stats
Observations,1000,1000,1000,1000
R2,0.042,0.354,0.167,0.482


In [5]:
import statsmodels.formula.api as smf

reg=smf.ols("sales ~ ability + age + training", data=df).fit()
et2 = ETable(reg, labels=labels, model_stats=["N","r2","adj_r2","se_type"])
et2

Unnamed: 0_level_0,Sales
Unnamed: 0_level_1,(1)
coef,coef
Ability (test score),94.672*** (11.433)
Age,-16.529 (13.775)
training,7671.183*** (350.050)
Intercept,2.010E+04*** (1274.713)
stats,stats
Observations,1000
R2,0.354
Adj. R2,0.352
S.E. type,nonrobust


In [6]:
et2

Unnamed: 0_level_0,Sales
Unnamed: 0_level_1,(1)
coef,coef
Ability (test score),94.672*** (11.433)
Age,-16.529 (13.775)
training,7671.183*** (350.050)
Intercept,2.010E+04*** (1274.713)
stats,stats
Observations,1000
R2,0.354
Adj. R2,0.352
S.E. type,nonrobust


In [7]:
dt1=DTable(df, vars=["ability","age"], bycol=["tgroup"], stats=["mean_newline_std"], counts_row_below=True, hide_stats=True, labels=labels)
dt1

Unnamed: 0,Control,Treatment 1,Treatment 2
stats,stats,stats,stats
Ability (test score),99.18 (15.11),99.55 (14.58),97.28 (14.08)
Age,39.47 (11.74),38.04 (12.25),39.22 (12.28)
nobs,nobs,nobs,nobs
N,304,340,356
Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).


In [8]:
dt1.save(type="tex", file_name="C:/PythonProjects/tabout/docs/test_tab2", replace=True)

Unnamed: 0,Control,Treatment 1,Treatment 2
stats,stats,stats,stats
Ability (test score),99.18 (15.11),99.55 (14.58),97.28 (14.08)
Age,39.47 (11.74),38.04 (12.25),39.22 (12.28)
nobs,nobs,nobs,nobs
N,304,340,356
Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).


In [9]:
dt1.update_docx(file_name="C:/PythonProjects/tabout/docs/PaperTest.docx", tab_num=1, show=False) 

In [10]:
dt=DTable(df, vars=["ability","age"], bycol=["tgroup"], stats=["mean_newline_std"], counts_row_below=True, labels=labels, hide_stats=True)

dt.update_docx(file_name="C:/PythonProjects/tabout/docs/PaperTest.docx", tab_num=1, show=True)

Unnamed: 0,Control,Treatment 1,Treatment 2
stats,stats,stats,stats
Ability (test score),99.18 (15.11),99.55 (14.58),97.28 (14.08)
Age,39.47 (11.74),38.04 (12.25),39.22 (12.28)
nobs,nobs,nobs,nobs
N,304,340,356
Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).


In [11]:
# # Create a simple index dataframe with random data
# simple_index = ['Var 1', 'Var 2', 'Var 3', 'Var 4', 'Var 5', 'Var 6']
# simple_col_index = ['Hello', 'There', 'France', 'US', 'High', 'Low', 'Extra1', 'Extra2']
# df = pd.DataFrame(np.random.randn(6, 8).round(3), index=simple_index, columns=simple_col_index)
# tab_out = TabOut(df, notes="Sample notes", caption="Sample Table 1", tab_label="tab_sample")

# # Generate tables in different formats
# ##tab_out.make(types=["gt", "docx"], path="C:/PythonProjects/pyregtable/")

