# 01 - Load + Clean (Individual Encounters Only)

Loads the combined export, standardizes columns, filters for:
- Closed encounters
- Billable = Yes
- Individual (no group)
- Removes group CPT 90853 if it exists

Then writes a cleaned file for revenue modeling.

In [None]:
import sys
from pathlib import Path
import pandas as pd

ROOT = Path.cwd().resolve()
if ROOT.name == "notebooks":
    ROOT = ROOT.parent

if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from src.cleaning import standardize_columns, filter_individual_closed_billable  # noqa: E402

## Load the combined dataset

In [None]:
DATA_PATH = ROOT / "data" / "sample_revenue_Oct22-Jan26.csv"
df_raw = pd.read_csv(DATA_PATH)

print("Raw rows:", len(df_raw))
print(df_raw.columns.tolist())
df_raw.head()

## Standardize column names and filter

In [None]:
df = standardize_columns(df_raw)
df = filter_individual_closed_billable(df)

print("Cleaned rows:", len(df))
df.head()

## Quick checks (facility counts + CPT counts)

In [None]:
print(df["facility"].value_counts(dropna=False).head(10))
print(df["cpt_code"].value_counts(dropna=False).head(20))

## Save cleaned file (local use)

In [None]:
CLEAN_PATH = ROOT / "data" / "clean_encounters.csv"
df.to_csv(CLEAN_PATH, index=False)
print("Wrote:", CLEAN_PATH)