In [1]:
import pandas as pd
import numpy as np

In [2]:
obs = pd.DataFrame({
        "lease_id": ["L1", "L2", "L3"],
        "t0": pd.to_datetime(["2025-10-01", "2025-09-15", "2025-08-30"])
    })
obs.head()

Unnamed: 0,lease_id,t0
0,L1,2025-10-01
1,L2,2025-09-15
2,L3,2025-08-30


In [6]:
payments = pd.DataFrame([
        # lease L1: two payments in last 90d, one on_time one late; one older payment
        {"lease_id": "L1", "pay_date": "2025-09-20", "status": "on_time"},
        {"lease_id": "L1", "pay_date": "2025-08-25", "status": "late"},
        {"lease_id": "L1", "pay_date": "2025-01-10", "status": "on_time"},
        # lease L2: no payments in 30d but 2 in 180d (both on_time)
        {"lease_id": "L2", "pay_date": "2025-07-20", "status": "on_time"},
        {"lease_id": "L2", "pay_date": "2025-06-15", "status": "on_time"},
        # lease L3: no payments at all (new or missing data)
    ])
payments["pay_date"] = pd.to_datetime(payments["pay_date"])
payments.head()

Unnamed: 0,lease_id,pay_date,status
0,L1,2025-09-20,on_time
1,L1,2025-08-25,late
2,L1,2025-01-10,on_time
3,L2,2025-07-20,on_time
4,L2,2025-06-15,on_time


In [35]:

def build_payment_window_features(payments: pd.DataFrame,
                                  obs: pd.DataFrame,
                                  lease_id_col: str = "lease_id",
                                  pay_date_col: str = "pay_date",
                                  status_col: str = "status",
                                  t0_col: str = "t0",
                                  windows_days=(30, 90, 180),
                                  on_time_label="on_time"):
    """
    Build rolling-window payment features for each lease at its t0.

    payments: DataFrame with columns [lease_id, pay_date, status]
    obs: DataFrame with rows for each lease including [lease_id, t0]
    Returns: obs copy with added features:
      - on_time_rate_{w}
      - num_payments_{w}
    for each w in windows_days, plus
      - on_time_rate_all, num_payments_all
      - trend_{w} = on_time_rate_{w} - on_time_rate_all
    Missing windows for a lease are filled as:
      on_time_rate = 1.0 (assume no late payments recorded)
      num_payments  = 0
    """
    # Ensure datetime
    payments = payments.copy()
    payments[pay_date_col] = pd.to_datetime(payments[pay_date_col])

    # Ensure obs has t0 as datetime
    obs = obs.copy()
    obs[t0_col] = pd.to_datetime(obs[t0_col])

    # Merge t0 onto payments so each payment has the t0 of its lease
    # Use inner join so only payments for leases in obs are kept
    merged = payments.merge(obs[[lease_id_col, t0_col]], on=lease_id_col, how='inner')

    # Precompute lifetime aggregates (all payments up to and including t0)
    # For safety, only consider payments with pay_date <= t0 (past or at t0)
    merged_life = merged[merged[pay_date_col] <= merged[t0_col]]
    life_agg = merged_life.groupby(lease_id_col).agg(
        on_time_rate_all=(status_col, lambda s: (s == on_time_label).mean()),
        num_payments_all=(status_col, "count")
    ).reset_index()

    # Initialize features DataFrame starting from obs
    feats = obs[[lease_id_col]].drop_duplicates().set_index(lease_id_col)

    # Attach lifetime features (fill defaults)
    life_agg = life_agg.set_index(lease_id_col)
    feats = feats.join(life_agg, how="left")
    feats["on_time_rate_all"] = feats["on_time_rate_all"].fillna(1.0)
    feats["num_payments_all"] = feats["num_payments_all"].fillna(0).astype(int)

    # For each rolling window compute on_time_rate_{w} and num_payments_{w}
    for w in windows_days:
        mask = (merged[pay_date_col] > (merged[t0_col] - pd.Timedelta(days=w))) & \
               (merged[pay_date_col] <= merged[t0_col])
        window_df = merged[mask]
        agg = window_df.groupby(lease_id_col).agg(
            **{f"on_time_rate_{w}": (status_col, lambda s: (s == on_time_label).mean()),
               f"num_payments_{w}": (status_col, "count")}
        ).reset_index().set_index(lease_id_col)

        # join and fill missing
        feats = feats.join(agg, how="left")
        feats[f"on_time_rate_{w}"] = feats[f"on_time_rate_{w}"].fillna(1.0)
        feats[f"num_payments_{w}"] = feats[f"num_payments_{w}"].fillna(0).astype(int)

        # trend: recent window rate minus lifetime rate
        feats[f"trend_on_time_rate_{w}_vs_all"] = feats[f"on_time_rate_{w}"] - feats["on_time_rate_all"]

    # Reset index to merge back into obs (preserves original order)
    feats = feats.reset_index()

    # Merge features into obs and return
    obs_out = obs.merge(feats, on=lease_id_col, how="left")

    # fill any remaining NaNs conservatively
    for col in obs_out.columns:
        if col.startswith("on_time_rate_"):
            obs_out[col] = obs_out[col].fillna(1.0)
        if col.startswith("num_payments_"):
            obs_out[col] = obs_out[col].fillna(0).astype(int)
    if "on_time_rate_all" in obs_out.columns:
        obs_out["on_time_rate_all"] = obs_out["on_time_rate_all"].fillna(1.0)
    if "num_payments_all" in obs_out.columns:
        obs_out["num_payments_all"] = obs_out["num_payments_all"].fillna(0).astype(int)

    return obs_out

In [36]:
enriched = build_payment_window_features(payments, obs)
enriched

Unnamed: 0,lease_id,t0,on_time_rate_all,num_payments_all,on_time_rate_30,num_payments_30,trend_on_time_rate_30_vs_all,on_time_rate_90,num_payments_90,trend_on_time_rate_90_vs_all,on_time_rate_180,num_payments_180,trend_on_time_rate_180_vs_all
0,L1,2025-10-01,0.666667,3,1.0,1,0.333333,0.5,2,-0.166667,0.5,2,-0.166667
1,L2,2025-09-15,1.0,2,1.0,0,0.0,1.0,1,0.0,1.0,2,0.0
2,L3,2025-08-30,1.0,0,1.0,0,0.0,1.0,0,0.0,1.0,0,0.0
