In [None]:
import numpy as np
import pandas as pd
import pyfixest as pf
from IPython.display import display, FileLink
from tabout import MTable,  DTable, ETable, BTable
from IPython.display import FileLink
import pylatex as pl 

# Use pylatex to create a tex file with the table
def make_pdf(tab_or_tabs, file):
    """
    Create a PDF document with one or more LaTeX tables.
    Accepts a single table string or a list of table strings.
    """
    doc = pl.Document()
    doc.packages.append(pl.Package("booktabs"))
    doc.packages.append(pl.Package("threeparttable"))
    doc.packages.append(pl.Package("makecell"))
    doc.packages.append(pl.Package("tabularx")) 
    doc.packages.append(pl.Package("array"))

    if not isinstance(tab_or_tabs, list):
        tab_or_tabs = [tab_or_tabs]

    with doc.create(pl.Section("PyFixest LaTeX Tables")):
        for i, tab in enumerate(tab_or_tabs, 1):
            with doc.create(pl.Table(position="htbp")) as table:
                table.append(pl.NoEscape(tab))

    doc.generate_pdf(file, clean_tex=False)

# Create a multiindex dataframe with random data
row_index = pd.MultiIndex.from_tuples(
    [
        ("Gr 1", "Var 1"),
        ("Gr 1", "Var 2"),
        ("Gr 1", "Var 3"),
        ("Gr 2", "Var 4"),
        ("Gr 2", "Var 5"),
        ("Gr 3", "Var 6"),
    ]
)

dfa = pd.DataFrame(
    np.random.randn(6, 2).round(2),
    index=["Var 1", "Var 2", "Var 3", "Var 4", "Var 5", "Var 6"],
    columns=["Hello", "There"]
)
tabs=[]
tabs.append(MTable(dfa).make(type="tex", tab_width="linewidth"))
tabs.append(MTable(dfa).make(type="tex", tab_width="10cm"))
tabs.append(MTable(dfa).make(type="tex", tab_width=r"0.8\textwidth"))

# Compile latex to pdf & display a button with the hyperlink to the pdf
make_pdf(tabs, "docs/SampleTableDoc2")
display(FileLink("docs/SampleTableDoc2.pdf"))


In [2]:
n=500
df1 = pd.DataFrame(index=range(n))
df1['ability'] = np.random.normal(100, 15, n)
df1['year'] = 1
# This is a number identifying each person in the data set
df1['persnr'] = df1.index
# Add age
df1['age'] = np.random.randint(18, 60, n)
# Add city cluster: assign each person to a city (e.g., 20 cities)
n_cities = 20
city_names = [f'City_{i+1}' for i in range(n_cities)]
df1['city'] = np.random.choice(city_names, size=n)
# Generate city-specific effects
city_effects = {city: np.random.normal(0, 2000) for city in city_names}
# Randomize three treatment groups (0, 1, 2)
df1['tgroup'] = np.random.choice([0, 1, 2], size=n, p=[1/3, 1/3, 1/3])
# Copy the DataFrame to create second year observations stored in df2
df2 = df1.copy()
df2['year'] = 2
df2['age'] = df2['age'] + 1
# Training only in the treatment group and only in year 2:
df1['training'] = 0
df2['training'] = (df2.tgroup > 0).astype(int)
# Generate DataFrame that spans both years combining the two:
# Note for each person we have two rows, one for each year
df = pd.concat([df1, df2], sort=False)
# Generate sales as before
# but add a year effect: sales increase by 4000 Euro between years 1 and 2
# and different treatment effects for each group
# and a city-specific effect
df['sales'] = (
    10000
    + (df.tgroup == 1) * 5000
    + (df.tgroup == 2) * 8000
    + df.training * 2000
    + df.ability * 100
    + df.year * 4000
    + df['city'].map(city_effects)
    + np.random.normal(0, 4000, 2 * n)
 )

labels = {"ability": "Ability (test score)",
    "sales": "Sales",
    "year": "Year",
    "age": "Age",
    "tgroup": "Treatment Group",
    "city": "City",}

df["tgroup"] = pd.Categorical(df["tgroup"], categories=[0, 1, 2], ordered=True)
df["tgroup"] = df["tgroup"].cat.rename_categories(["Control", "Treatment 1", "Treatment 2"])


In [3]:
tab2=BTable(df[(df.year==1)], vars=["ability","age"], 
            group="tgroup", labels=labels, stats=["mean","std"] , digits=2,
            caption="Balance Table for Year 1", tab_label="tab_2")
tab2.update_docx(file_name="C:/PythonProjects/tabout/docs/PaperTest1.docx", tab_num=2, show=False)

In [4]:
est=pf.feols("sales ~ ability + csw(age, training) | year + city", data=df)
tab3 = ETable(est, labels=labels, model_stats=["N","r2","r2_within"],
              caption="Regression Results", tab_label="tab_3")
tab3.update_docx(file_name="C:/PythonProjects/tabout/docs/PaperTest1.docx", tab_num=3, show=False)

In [5]:
import statsmodels.formula.api as smf

reg=smf.ols("sales ~ ability + age + training", data=df).fit()
tab4 = ETable(reg, labels=labels, model_stats=["N","r2","adj_r2","se_type"],
              caption="Regression Results Statsmodels", tab_label="tab_4")
tab4.update_docx(file_name="C:/PythonProjects/tabout/docs/PaperTest1.docx", tab_num=4, show=False)


In [6]:
tab5=DTable(df, vars=["sales","ability","age"], bycol=["tgroup"], byrow="year",
             stats=["mean_newline_std"], counts_row_below=True, hide_stats=True, labels=labels, caption="Descriptive Statistics by Treatment Group and Year",
             tab_label="tab_5")
tab5.update_docx(file_name="C:/PythonProjects/tabout/docs/PaperTest1.docx", tab_num=5, show=False)
