In [1]:
#| echo: false
#| include: false
#| eval: true

# Import necessary libraries
import numpy as np
import pandas as pd
import pyfixest as pf
from IPython.display import DisplayObject, display
import tabout as to
from tabout.mtable import MTable

to.MTable.DEFAULT_FULL_WIDTH = True
to.MTable.DEFAULT_FIRST_COL_WIDTH = r"0.25\linewidth"

\onehalfspacing

## Introduction
Lore ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

In [2]:
#| label: tbl-1
#| tbl-cap: "The First Table"
#| tbl-pos: "H"

# Create a multiindex dataframe with random data
row_index = pd.MultiIndex.from_tuples(
    [
        ("Gr 1", "Var 1"),
        ("Gr 1", "Var 2"),
        ("Gr 1", "Var 3"),
        ("Gr 2", "Var 4"),
        ("Gr 2", "Var 5"),
        ("Gr 3", "Var 6"),
    ]
)

col_index = pd.MultiIndex.from_product([["France", "US"], ["High", "Low"]])
df = pd.DataFrame(np.random.randn(6, 4).round(3), index=row_index, columns=col_index)


to.MTable(df, notes="Sample notes!")


Unnamed: 0_level_0,France,France,US,US
Unnamed: 0_level_1,High,Low,High,Low
Gr 1,Gr 1,Gr 1,Gr 1,Gr 1
Var 1,-1.407,2.0,-1.203,-0.843
Var 2,-3.611,0.658,-0.944,-1.441
Var 3,1.861,0.663,0.007,1.145
Gr 2,Gr 2,Gr 2,Gr 2,Gr 2
Var 4,1.003,-0.819,0.352,-0.022
Var 5,-0.405,0.033,-1.0,-0.44
Gr 3,Gr 3,Gr 3,Gr 3,Gr 3
Var 6,-1.426,0.919,-1.724,-0.415
Sample notes!,Sample notes!,Sample notes!,Sample notes!,Sample notes!




In [3]:
n=500
df1 = pd.DataFrame(index=range(n))
df1['ability'] = np.random.normal(100, 15, n)
df1['year'] = 1
# This is a number identifying each person in the data set
df1['persnr'] = df1.index
# Add age
df1['age'] = np.random.randint(18, 60, n)
# Add city cluster: assign each person to a city (e.g., 20 cities)
n_cities = 20
city_names = [f'City_{i+1}' for i in range(n_cities)]
df1['city'] = np.random.choice(city_names, size=n)
# Generate city-specific effects
city_effects = {city: np.random.normal(0, 2000) for city in city_names}
# Randomize three treatment groups (0, 1, 2)
df1['tgroup'] = np.random.choice([0, 1, 2], size=n, p=[1/3, 1/3, 1/3])
# Copy the DataFrame to create second year observations stored in df2
df2 = df1.copy()
df2['year'] = 2
df2['age'] = df2['age'] + 1
# Training only in the treatment group and only in year 2:
df1['training'] = 0
df2['training'] = (df2.tgroup > 0).astype(int)
# Generate DataFrame that spans both years combining the two:
# Note for each person we have two rows, one for each year
df = pd.concat([df1, df2], sort=False)
# Generate sales as before
# but add a year effect: sales increase by 4000 Euro between years 1 and 2
# and different treatment effects for each group
# and a city-specific effect
df['sales'] = (
    10000
    + (df.tgroup == 1) * 5000
    + (df.tgroup == 2) * 8000
    + df.training * 2000
    + df.ability * 100
    + df.year * 4000
    + df['city'].map(city_effects)
    + np.random.normal(0, 4000, 2 * n)
 )

labels = {"ability": "Ability (test score)",
    "sales": "Sales (Euro)",
    "year": "Year",
    "age": "Age (years)",
    "tgroup": "Treatment Group",
    "city": "City",}

df["tgroup"] = pd.Categorical(df["tgroup"], categories=[0, 1, 2], ordered=True)
df["tgroup"] = df["tgroup"].cat.rename_categories(["Control", "Treatment 1", "Treatment 2"])


In [4]:
#| label: tbl-2
#| tbl-cap: "The Second Table"
#| tbl-pos: "H"
to.DTable(df, vars=["ability","age"], bycol=["tgroup"], stats=["mean_newline_std"], counts_row_below=True, hide_stats=True, labels=labels)

Unnamed: 0,Control,Treatment 1,Treatment 2
stats,stats,stats,stats
Ability (test score),99.78 (15.13),99.34 (15.38),97.47 (17.09)
Age (years),39.12 (12.40),38.53 (12.46),38.62 (12.36)
nobs,nobs,nobs,nobs
N,320,362,318
Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).




In [5]:
#| label: tbl-3
#| tbl-cap: "The Second Table"
#| tbl-pos: "H"
est=pf.feols("sales ~ ability + csw(age, training)", data=df)
to.ETable(est, labels=labels)

Unnamed: 0_level_0,Sales (Euro),Sales (Euro)
Unnamed: 0_level_1,(1),(2)
coef,coef,coef
Ability (test score),66.359*** (12.840),71.335*** (10.007)
Age (years),1.909 (16.448),-4.368 (12.819)
training,,8521.194*** (335.259)
Intercept,2.391E+04*** (1421.734),2.077E+04*** (1114.735)
stats,stats,stats
Observations,1000,1000
R²,0.026,0.409
"Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)","Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)","Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)"




In [6]:
# df = to.import_dta("https://www.stata-press.com/data/r18/auto.dta")
# to.ETable(pf.feols("mpg ~ csw(weight, length)", data=df))

## Conclusion

Lore ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

{{< pagebreak >}}

## References

::: {#refs}
:::