In [4]:
import numpy as np
import pandas as pd
import pyfixest as pf
from IPython.display import display, FileLink
from tabout import TabOut,  DTable

# Create a multiindex dataframe with random data
row_index = pd.MultiIndex.from_tuples(
    [
        ("Gr 1", "Var 1"),
        ("Gr 1", "Var 2"),
        ("Gr 1", "Var 3"),
        ("Gr 2", "Var 4"),
        ("Gr 2", "Var 5"),
        ("Gr 3", "Var 6"),
    ]
)

col_index = pd.MultiIndex.from_product([["Hello", "There"], ["France", "US"], ["High", "Low"]])
dfa = pd.DataFrame(np.random.randn(6, 8).round(3), index=row_index, columns=col_index)


tab = TabOut(dfa, notes="Sample very new notes!", caption="The first table", tab_label="tab_sample_1")

# Generate table
# tab.make()

# Save table
tab.save(type="docx", file_name="C:/PythonProjects/tabout/docs/test_tab1", replace=True)

# Update word document with table
#tab.update_docx(file_name="C:/PythonProjects/tabout/docs/PaperTest.docx", tab_num=1, show=False)

#pf.make_table(df=df, caption="This is table 7!", notes="These are brand new notes", type="docx", 
#          file_name="C:/PythonProjects/pyfixestStore/test.docx", tab_num=7)

The first table,The first table,The first table,The first table,The first table,The first table,The first table,The first table,The first table
Unnamed: 0_level_1,Hello,Hello,Hello,Hello,There,There,There,There
Unnamed: 0_level_2,France,France,US,US,France,France,US,US
Unnamed: 0_level_3,High,Low,High,Low,High,Low,High,Low
Gr 1,Gr 1,Gr 1,Gr 1,Gr 1,Gr 1,Gr 1,Gr 1,Gr 1
Var 1,0.008,-1.501,1.273,0.486,0.881,1.265,-0.39,2.837
Var 2,-0.082,-0.267,0.824,-0.483,1.141,-0.048,1.122,0.622
Var 3,0.946,0.59,0.777,0.388,1.323,-0.093,-0.046,-1.922
Gr 2,Gr 2,Gr 2,Gr 2,Gr 2,Gr 2,Gr 2,Gr 2,Gr 2
Var 4,0.88,-1.273,0.024,-2.642,-0.156,0.615,0.689,0.156
Var 5,1.76,1.433,0.473,1.33,-0.97,-0.543,0.262,0.781
Gr 3,Gr 3,Gr 3,Gr 3,Gr 3,Gr 3,Gr 3,Gr 3,Gr 3
Var 6,-1.019,0.94,1.412,-1.664,-0.216,1.517,-0.863,0.879
Sample very new notes!,Sample very new notes!,Sample very new notes!,Sample very new notes!,Sample very new notes!,Sample very new notes!,Sample very new notes!,Sample very new notes!,Sample very new notes!


In [5]:
n=500
df1 = pd.DataFrame(index=range(n))
df1['ability'] = np.random.normal(100, 15, n)
df1['year'] = 1
# This is a number identifying each person in the data set
df1['persnr'] = df1.index
# Add age
df1['age'] = np.random.randint(18, 60, n)
# Add city cluster: assign each person to a city (e.g., 20 cities)
n_cities = 20
city_names = [f'City_{i+1}' for i in range(n_cities)]
df1['city'] = np.random.choice(city_names, size=n)
# Generate city-specific effects
city_effects = {city: np.random.normal(0, 2000) for city in city_names}
# Randomize three treatment groups (0, 1, 2)
df1['tgroup'] = np.random.choice([0, 1, 2], size=n, p=[1/3, 1/3, 1/3])
# Copy the DataFrame to create second year observations stored in df2
df2 = df1.copy()
df2['year'] = 2
df2['age'] = df2['age'] + 1
# Training only in the treatment group and only in year 2:
df1['training'] = 0
df2['training'] = (df2.tgroup > 0).astype(int)
# Generate DataFrame that spans both years combining the two:
# Note for each person we have two rows, one for each year
df = pd.concat([df1, df2], sort=False)
# Generate sales as before
# but add a year effect: sales increase by 4000 Euro between years 1 and 2
# and different treatment effects for each group
# and a city-specific effect
df['sales'] = (
    10000
    + (df.tgroup == 1) * 5000
    + (df.tgroup == 2) * 8000
    + df.training * 2000
    + df.ability * 100
    + df.year * 4000
    + df['city'].map(city_effects)
    + np.random.normal(0, 4000, 2 * n)
 )

labels = {"ability": "Ability (test score)",
    "sales": "Sales (Euro)",
    "year": "Year",
    "age": "Age (years)",
    "tgroup": "Treatment Group",
    "city": "City",}

df["tgroup"] = pd.Categorical(df["tgroup"], categories=[0, 1, 2], ordered=True)
df["tgroup"] = df["tgroup"].cat.rename_categories(["Control", "Treatment 1", "Treatment 2"])


In [6]:
dt1=DTable(df, vars=["ability","age"], bycol=["tgroup"], stats=["mean_newline_std"], counts_row_below=True, hide_stats=True, labels=labels)
dt1

Unnamed: 0,Control,Treatment 1,Treatment 2
stats,stats,stats,stats
Ability (test score),100.57 (15.90),100.92 (15.30),99.96 (13.98)
Age (years),40.23 (12.31),37.86 (11.48),40.34 (11.85)
nobs,nobs,nobs,nobs
N,324,320,356
Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).




In [7]:
dt1.save(type="tex", file_name="C:/PythonProjects/tabout/docs/test_tab2", replace=True)

Unnamed: 0,Control,Treatment 1,Treatment 2
stats,stats,stats,stats
Ability (test score),100.57 (15.90),100.92 (15.30),99.96 (13.98)
Age (years),40.23 (12.31),37.86 (11.48),40.34 (11.85)
nobs,nobs,nobs,nobs
N,324,320,356
Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).


In [None]:
dt1.update_docx(file_name="C:/PythonProjects/tabout/docs/PaperTest.docx", tab_num=1, show=False) 

In [None]:
dt=DTable(df, vars=["ability","age"], bycol=["tgroup"], stats=["mean_newline_std"], counts_row_below=True, labels=labels, hide_stats=True)

dt.update_docx(file_name="C:/PythonProjects/tabout/docs/PaperTest.docx", tab_num=1, show=True)

Unnamed: 0,Control,Treatment 1,Treatment 2
stats,stats,stats,stats
Ability (test score),100.57 (15.90),100.92 (15.30),99.96 (13.98)
Age (years),40.23 (12.31),37.86 (11.48),40.34 (11.85)
nobs,nobs,nobs,nobs
N,324,320,356
Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).,Note: Displayed statistics are Mean (Std. Dev.).


In [10]:
# # Create a simple index dataframe with random data
# simple_index = ['Var 1', 'Var 2', 'Var 3', 'Var 4', 'Var 5', 'Var 6']
# simple_col_index = ['Hello', 'There', 'France', 'US', 'High', 'Low', 'Extra1', 'Extra2']
# df = pd.DataFrame(np.random.randn(6, 8).round(3), index=simple_index, columns=simple_col_index)
# tab_out = TabOut(df, notes="Sample notes", caption="Sample Table 1", tab_label="tab_sample")

# # Generate tables in different formats
# ##tab_out.make(types=["gt", "docx"], path="C:/PythonProjects/pyregtable/")

