In [None]:
import os
os.chdir('..')  # set cwd to project root

In [None]:
import os
os.chdir('..')  # set cwd to project root

# Survival Curve Plots
Analyzing historical turnover by tenure cohorts using Kaplan–Meier.

In [None]:
import pandas as pd
from lifelines import KaplanMeierFitter
import matplotlib.pyplot as plt

In [None]:
# Load historical turnover data
df = pd.read_csv('../data/historical_turnover.csv', parse_dates=['hire_date','termination_date'])
# Compute duration (years) and event observed flag
df['duration'] = (df['termination_date'].fillna(pd.Timestamp.today()) - df['hire_date']).dt.days / 365
df['event_observed'] = df['termination_date'].notna().astype(int)
# Assign cohorts based on duration
def assign_cohort(x):
    if x <= 1: return '0-1yr'
    elif x <= 3: return '1-3yr'
    else: return '3+yr'
df['cohort'] = df['duration'].apply(assign_cohort)

In [None]:
# Plot Kaplan–Meier survival curves by cohort
kmf = KaplanMeierFitter()
plt.figure(figsize=(8,6))
for cohort, group in df.groupby('cohort'):
    kmf.fit(group['duration'], group['event_observed'], label=cohort)
    kmf.plot_survival_function(ci_show=False)
plt.title('Survival Curves by Tenure Cohort')
plt.xlabel('Years Since Hire')
plt.ylabel('Survival Probability')
plt.legend()
plt.show()