In [3]:
import pandas as pd
import numpy as np

df = pd.read_csv("minwage.txt", delim_whitespace=True)

In [5]:
num_cols = [
    "empft", "emppt", "nmgrs",
    "empft2", "emppt2", "nmgrs2"
]

for col in num_cols:
    df[col] = pd.to_numeric(df[col], errors="coerce")

In [6]:
df["fte"]  = df["empft"]  + df["nmgrs"]  + 0.5 * df["emppt"]
df["fte2"] = df["empft2"] + df["nmgrs2"] + 0.5 * df["emppt2"]

In [7]:
cols_required = [
    "wagest", "wagest2",
    "empft", "emppt", "nmgrs",
    "empft2", "emppt2", "nmgrs2"
]

df_clean = df.dropna(subset=cols_required).copy()

In [8]:
# Before: FTE by state
pa_before = df_clean.loc[df_clean["state"] == 0, "fte"].mean()
nj_before = df_clean.loc[df_clean["state"] == 1, "fte"].mean()

# After: FTE2 by state
pa_after = df_clean.loc[df_clean["state"] == 0, "fte2"].mean()
nj_after = df_clean.loc[df_clean["state"] == 1, "fte2"].mean()

# Diffs within state (After - Before)
pa_diff = pa_after - pa_before
nj_diff = nj_after - nj_before

# Difference-in-differences (NJ - PA)
did = nj_diff - pa_diff

pa_before, nj_before, pa_after, nj_after, pa_diff, nj_diff, did

(23.38,
 20.430582524271845,
 21.096666666666668,
 20.89724919093851,
 -2.2833333333333314,
 0.466666666666665,
 2.7499999999999964)

In [10]:
import pandas as pd

did_table = pd.DataFrame({
    "PA": [
        pa_before,
        pa_after,
        pa_diff
    ],
    "NJ": [
        nj_before,
        nj_after,
        nj_diff
    ],
    "NJ - PA": [
        nj_before - pa_before,
        nj_after - pa_after,
        did
    ]
}, index=["Before", "After", "After - Before"])

did_table

Unnamed: 0,PA,NJ,NJ - PA
Before,23.38,20.430583,-2.949417
After,21.096667,20.897249,-0.199417
After - Before,-2.283333,0.466667,2.75


In [16]:
import pandas as pd
import statsmodels.formula.api as smf

# "Before" observations
before = df_clean[["state", "chain", "own", "fte"]].copy()
before["post"] = 0   # pre-policy
before = before.rename(columns={"fte": "fte"})

# "After" observations
after = df_clean[["state", "chain", "own", "fte2"]].copy()
after["post"] = 1    # post-policy
after = after.rename(columns={"fte2": "fte"})

# Stack them
df_long = pd.concat([before, after], ignore_index=True)

# Treatment indicator: NJ = 1, PA = 0
df_long["treat"] = df_long["state"]

# DiD interaction
df_long["did"] = df_long["treat"] * df_long["post"]


In [17]:
mod_b = smf.ols("fte ~ treat + post + did", data=df_long).fit(cov_type="HC1")
print(mod_b.summary().tables[1])  # coefficient table

print("DiD estimate (part b):", mod_b.params["did"])

                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     23.3800      1.381     16.928      0.000      20.673      26.087
treat         -2.9494      1.477     -1.996      0.046      -5.845      -0.054
post          -2.2833      1.684     -1.356      0.175      -5.584       1.017
did            2.7500      1.843      1.492      0.136      -0.862       6.362
DiD estimate (part b): 2.750000000000006


In [18]:
mod_c = smf.ols("fte ~ treat + post + did + C(chain) + own",
                data=df_long).fit(cov_type="HC1")
print(mod_c.summary().tables[1])

print("DiD estimate (part b):", mod_b.params["did"])
print("DiD estimate (part c, with controls):", mod_c.params["did"])


                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept        26.0856      1.362     19.154      0.000      23.416      28.755
C(chain)[T.2]   -10.6789      0.644    -16.593      0.000     -11.940      -9.418
C(chain)[T.3]    -1.8384      0.872     -2.108      0.035      -3.548      -0.129
C(chain)[T.4]    -1.1819      1.027     -1.151      0.250      -3.195       0.831
treat            -2.3538      1.299     -1.812      0.070      -4.899       0.192
post             -2.2833      1.459     -1.565      0.118      -5.144       0.577
did               2.7500      1.610      1.708      0.088      -0.406       5.906
own              -1.0830      0.646     -1.678      0.093      -2.348       0.182
DiD estimate (part b): 2.750000000000006
DiD estimate (part c, with controls): 2.7499999999999893


In [22]:
import numpy as np
import statsmodels.formula.api as smf

# Start from df_clean
df_gap = df_clean.copy()

# 1) Ensure wagest is numeric
df_gap["wagest"] = pd.to_numeric(df_gap["wagest"], errors="coerce")

# 2) Create gap
target_min = 5.05  # NJ new minimum wage in Card & Krueger

df_gap["gap"] = 0.0
nj_mask = df_gap["state"] == 1  # 1 = NJ

# For NJ stores: max(0, 5.05 - starting wage)
df_gap.loc[nj_mask, "gap"] = (target_min - df_gap.loc[nj_mask, "wagest"]).clip(lower=0)

# 3) Change in full-time employment (not FTE)
df_gap["d_empft"] = df_gap["empft2"] - df_gap["empft"]

# 4) Drop any rows with missing values in these variables
df_gap = df_gap.dropna(subset=["gap", "d_empft"])


In [23]:
mod_gap = smf.ols("d_empft ~ gap", data=df_gap).fit(cov_type="HC1")
print(mod_gap.summary().tables[1])

beta_gap = mod_gap.params["gap"]
mean_gap_nj = df_gap.loc[nj_mask, "gap"].mean()
effect_gap = beta_gap * mean_gap_nj

print("Coefficient on gap:", beta_gap)
print("Mean gap among NJ restaurants:", mean_gap_nj)
print("Predicted effect beta*mean_gap:", effect_gap)


                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.8285      0.871     -2.099      0.036      -3.536      -0.121
gap            4.5582      1.584      2.878      0.004       1.454       7.662
Coefficient on gap: 4.558178338825757
Mean gap among NJ restaurants: 0.462286689419795
Predicted effect beta*mean_gap: 2.10718517404078


In [24]:
import statsmodels.formula.api as smf

# Regression: Î”empft ~ gap + state + controls
mod_e = smf.ols(
    "d_empft ~ gap + state + C(chain) + own",
    data=df_gap
).fit(cov_type="HC1")

print(mod_e.summary().tables[1])

                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept        -2.1566      1.802     -1.197      0.231      -5.688       1.374
C(chain)[T.2]     0.1408      1.241      0.113      0.910      -2.292       2.573
C(chain)[T.3]    -2.2130      1.267     -1.747      0.081      -4.696       0.270
C(chain)[T.4]    -1.5782      1.931     -0.817      0.414      -5.362       2.206
gap               3.3237      1.795      1.852      0.064      -0.194       6.841
state             1.2990      1.766      0.736      0.462      -2.161       4.759
own               1.3344      1.030      1.295      0.195      -0.685       3.353


In [25]:
beta_state = mod_e.params["state"]
p_state = mod_e.pvalues["state"]

print("Coefficient on state:", beta_state)
print("p-value:", p_state)


Coefficient on state: 1.2990440002924557
p-value: 0.4618614230188677
