# 03 — Link Mortality (Follow-up)

Run first cell:
```python
%run 00_bootstrap.ipynb
```

In [None]:
# check drop by step

# Step 1: Initial inclusion based on ELIGSTAT and MORTSTAT
score_mort["include"] = np.where(
    (score_mort["ELIGSTAT"] == 1) & (score_mort["MORTSTAT"].notna()), 1, 0
)
print(f"After ELIGSTAT & MORTSTAT inclusion: {score_mort['include'].value_counts().to_dict()}")

# Step 2: Exclude missing HEI, or DR12DRST > 1 
step2_cond = (
    (score_mort["include"] == 1) &
    (
        score_mort["HEI2015_TOTAL_SCORE"].isna() |
        #(score_mort["WTDRD1"] <= 0) |
        (score_mort["DR12DRST"] > 1)
    )
)
print(f"Dropped at Step 2 (diet data or recall quality): {step2_cond.sum()}")
score_mort.loc[step2_cond, "include"] = 2

# Step 3: Exclude missing FS, SNAP, or pir == 4
step3_cond = (
    (score_mort["include"] == 1) &
    (
        score_mort["FS"].isna() |
        score_mort["SNAP"].isna() |
        (score_mort["pir"] == 4)
    )
)
print(f"Dropped at Step 3 (missing FS/SNAP or pir=4): {step3_cond.sum()}")
score_mort.loc[step3_cond, "include"] = 3

# Step 4: Exclude if WTDRD1 <= 0 
step4_cond = (score_mort["include"] == 1) & (score_mort["WTDRD1"] <= 0)
print(f"Dropped at Step 4 (zero or negative WTDRD1): {step4_cond.sum()}")
score_mort.loc[step4_cond, "include"] = 4

# Final count of each inclusion code
print(f"📊 Final include flag counts: {score_mort['include'].value_counts().to_dict()}")

# Apply final filter
score_mort = score_mort[score_mort["include"] == 1].copy()
print(f" Final analytic sample size: {score_mort.shape[0]}")


# Inclusion flag
score_mort["include"] = np.where(
    (score_mort["ELIGSTAT"] == 1) & (score_mort["MORTSTAT"].notna()), 1, 0
)
score_mort.loc[
    (score_mort["include"] == 1) &
    (
        (score_mort["HEI2015_TOTAL_SCORE"].isna()) |
        (score_mort["WTDRD1"] <= 0) |
        (score_mort["DR12DRST"] > 1)
    ),
    "include"
] = 2
score_mort.loc[
    (score_mort["include"] == 1) &
    (
        score_mort["FS"].isna() |
        score_mort["SNAP"].isna() |
        (score_mort["pir"] == 4)
    ),
    "include"
] = 3
score_mort.loc[
    (score_mort["include"] == 1) & (score_mort["WTDRD1"] <= 0),
    "include"
] = 4

# Insurance binary
score_mort["ins2"] = np.where(score_mort["ins"] == 0, 0, 1)

# Unemployment indicator
score_mort["unemployment2"] = np.where(score_mort["employ"] > 1, 1, 0)

# Final filter to keep only those with include == 1
score_mort = score_mort[score_mort["include"] == 1].copy()

# Save final dataset
score_mort.to_pickle(os.path.join(folder_path, "SODH_diet_mort.pkl"))  # You can also use .csv or .parquet