# Imports

In [1]:
%run ../imports.ipynb

from cpn.data.sources.compustat import CompustatAnnual

WRITEOUT = True

Set LOG_FORMAT="%(levelname)s::%(message)s" to change log format.
IPython: 7.21.0
Numpy (np): 1.20.3
Scipy (sp, stats): 1.6.3
Pandas (pd): 1.2.3
MatPlotLib (mpl, plt): 3.4.2
Seaborn (sns): 0.11.0
Scikit-Learn (sk): 0.24.2
Statsmodels (sm): 0.12.2
Patsy (pt): 0.5.1
SQLAlchemy (sa): 1.3.20
Gensim: 3.8.3


In [None]:
rename_dict = {
    # Variable renames
    'log_at': 'Log Total Assets',
    'mve':'Market Value Equity',
    # Stat renames
    'count':'Observations', 
    'mean':'Mean', 
    'std':'Std. Dev.', 
    'min':'Min', 
    '50%':'Median', 
    'max':'Max'
}
describe_cols = S('count mean std 1% 25% 50% 75% 99%')
describe_pctiles = [.01, .25, .5, .75, .99]

In [None]:
def latex_format(num_in):
    try:
        num_in = float(num_in)
        num_dig = np.log10(abs(num_in)) + 1
        if num_in == 0:
            return "0"
        if num_dig >= 3:
            return f"{int(num_in):,d}"
        elif num_dig > 2:
            return f"{num_in:2.1f}"
        elif num_dig > 1:
            return f"{num_in:2.2f}"
        return f"{num_in:1.3f}"
    except ValueError:
        return str(num_in)

for i in range(-1, 4):
    print(latex_format(4.176234*10**i))

# Panel A: Summary Stats

In [None]:
df = CompustatAnnual.data

print(f"Length: {len(df):,d}")

In [None]:
sA = (df['log_at mve'.split()]
      .describe(percentiles=describe_pctiles)
      .T
      [describe_cols]
      .rename(columns=rename_dict)
)
sA

In [None]:
tex = sA.to_latex(na_rep='', formatters=[latex_format]*len(sA.columns))
tex = tex.replace(r'\toprule', r'\toprule \multicolumn{9}{c}{\textit{Panel A}: Summary Stats} \\ \midrule')
if WRITEOUT:
    for _folder in (config.TABLE_DIR, config.TABLE_DIR_OVERLEAF):
        fp = os.path.join(_folder, 'summary_stats_panel_A.tex')
        print(f"Wrote: {fp}")
        with open(fp, 'w') as fh:
            fh.write(tex)
print('\n', tex)