## Load and Preprocess

In [1]:
from skms import load_aidssi

df = load_aidssi(prepare=True)
df

Unnamed: 0,patnr,tstart,tstop,origin_state,target_state,status,ccr5
0,1,0,9.106,0,1,1,WW
1,1,0,9.106,0,2,0,WW
2,2,0,11.039,0,1,0,WM
3,2,0,11.039,0,2,0,WM
4,3,0,2.234,0,1,1,WW
...,...,...,...,...,...,...,...
653,327,0,5.314,0,2,1,WW
654,328,0,10.117,0,1,1,WW
655,328,0,10.117,0,2,0,WW
656,329,0,2.631,0,1,0,WM


In [4]:
from skms.visualization import StateDiagramGenerator

state_labels = {0: "Event-free", 1: "AIDS", 2: "SI"}
terminal_states = [3]

sdg = StateDiagramGenerator(
    dataset=df,
    patient_id='patnr',
    from_state='origin_state',
    to_state='target_state',
    tstart='tstart',
    tstop='tstop',
    status='status',
    state_labels=state_labels,
    terminal_states=terminal_states
)

sdg.plot_state_diagram()

In [None]:
import pandas as pd


def counterfactual_to_competing_risks_format(df, covariate_cols=None):
    """
    Convert counterfactual data to competing risks format with covariates.

    Parameters:
    -----------
    df : DataFrame
        Counterfactual format data
    covariate_cols : list, optional
        List of covariate column names to preserve
    """
    # Auto-detect covariates if not specified
    if covariate_cols is None:
        standard_cols = [
            "patnr",
            "tstart",
            "tstop",
            "origin_state",
            "target_state",
            "status",
        ]
        covariate_cols = [col for col in df.columns if col not in standard_cols]

    # Get only the rows where an event actually occurred
    actual_events = df[df["status"] == 1].copy()

    # If a patient has no events (all status=0), they're censored
    all_patients = df["patnr"].unique()
    patients_with_events = actual_events["patnr"].unique()
    censored_patients = set(all_patients) - set(patients_with_events)

    # Add censored patients
    if censored_patients:
        censored_data = []
        for patient in censored_patients:
            patient_data = df[df["patnr"] == patient].iloc[0].copy()
            patient_data["status"] = 0
            patient_data["target_state"] = 0  # 0 typically indicates censoring
            censored_data.append(patient_data)

        censored_df = pd.DataFrame(censored_data)
        actual_events = pd.concat([actual_events, censored_df], ignore_index=True)

    # Rename columns to match expected format
    standard_df = actual_events.rename(columns={"patnr": "id", "tstop": "duration", "target_state": "event"})

    # Keep necessary columns including covariates
    base_cols = ["id", "duration", "event"]
    standard_df = standard_df[base_cols + covariate_cols]

    return standard_df


# Convert to standard format preserving covariates
standard_df = counterfactual_to_competing_risks_format(df, covariate_cols=["ccr5"])
print("Standard format shape:", standard_df.shape)
print("Standard format columns:", standard_df.columns.tolist())
display(standard_df.head())

Standard format shape: (329, 4)
Standard format columns: ['id', 'duration', 'event', 'ccr5']


Unnamed: 0,id,duration,event,ccr5
0,1,9.106,1,WW
1,3,2.234,1,WW
2,4,9.878,2,WM
3,5,3.819,1,WW
4,6,6.801,1,WW


## Model

In [7]:
model_df = standard_df.copy()
model_df["ccr5"] = (model_df["ccr5"] == "WW").astype(int)
model_df.head()

Unnamed: 0,id,duration,event,ccr5
0,1,9.106,1,1
1,3,2.234,1,1
2,4,9.878,2,0
3,5,3.819,1,1
4,6,6.801,1,1


In [10]:
from skms.models.base import CompetingRisksModel

# Now you can use it with CompetingRisksModel
crm = CompetingRisksModel()
crm.fit(model_df, duration_col='duration', event_col='event')