In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('elasticities.csv')

In [3]:
    
def parse_econ_latex_table(df):
    dfstd = df[df.applymap(lambda x: "(" in x)].dropna().copy().reset_index(drop=True)
    dfstd = dfstd.applymap(lambda x: float(x.replace("(", "").replace(")", "")) )
    dfmean = df[df.applymap(lambda x: "(" not in x)].dropna().copy().reset_index(drop=True)
    records = []
    for (idx, row), (jdx, srow) in zip(dfmean.iterrows(), dfstd.iterrows()):
        for col in dfmean.columns:
            records += [{
            'mean': row[col].replace("^{* * *}", "").replace("^{* *}", "").replace("^*", ""),
            'std': srow[col],
            'experiment': col.split("-")[-1],
            'significance' : 0.1 if '^*' in row[col] else 0.05 if '^{* *}' in row[col] else 0.01 if '^{* * *}' in row[col] else pd.np.nan,
            'row': idx,
            }]
    dfv = pd.DataFrame(records)
    techniques = [
        {"name": "IV: (lagged) mechanical tax rate changes", "row": 0},
        {"name":"IV-other", "row":1},
        {"name":"DID-IV", "row":2},
        {"name":"DID-classic", "row":3},
        {"name":"Income Control (none)", "row":4},
        {"name":"Income Control (Gruber Saez Spline)", "row":5},
        {"name":"Income Control (Kopczuk)", "row":6},
        {"name":"Income Control (other)", "row":7},
        {"name":"Difference Length (1 year)", "row":8},
        {"name":"Difference Length (2 years)", "row":9},
        {"name":"Difference Length (4+ years)", "row":10},
    ]

    dft = pd.DataFrame(techniques)
    experiments = [
        {"experiment": "1", "control": "No Control"},
        {"experiment": "2", "control": "Control for estimation technique"},
        {"experiment": "3", "control": "Control for estimation technique and sample restrictions"},
        {"experiment": "4", "control": "Control for estimation technique, sample restrictions, estimation decade"},
        {"experiment": "5", "control": "Control for estimation technique, sample restrictions, publication decade"},
        {"experiment": "6", "control": "Control for all variables"},
    ]

    dfe = pd.DataFrame(experiments)
    dfr = pd.merge(dfv, dft, on="row")
    dfr = pd.merge(dfr, dfe, on="experiment")
    return dfr

In [4]:
dfe = parse_econ_latex_table(df)

  'significance' : 0.1 if '^*' in row[col] else 0.05 if '^{* *}' in row[col] else 0.01 if '^{* * *}' in row[col] else pd.np.nan,


In [5]:
dfd = pd.read_csv('elasticities-after-deductions.csv')

In [6]:
dfd = parse_econ_latex_table(dfd)

  'significance' : 0.1 if '^*' in row[col] else 0.05 if '^{* *}' in row[col] else 0.01 if '^{* * *}' in row[col] else pd.np.nan,


In [7]:
dfd["deductions"] = "After"
dfe["deductions"] = "Before"

In [8]:
dfout = pd.concat([dfe, dfd], axis=0)

In [9]:
dfout.to_csv('elasticities-parsed.csv', index=False)
dfout.to_excel('elasticities-parsed.xlsx', index=False)
dfout.to_json('elasticities-parsed.json', orient='records')
dfout.to_parquet('elasticities-parsed.parquet')