In [10]:
import numpy as np
import pandas as pd

from lifelines import CoxPHFitter, KaplanMeierFitter
from lifelines.utils import concordance_index

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [11]:
lical0 = pd.read_csv('/Users/Apple/projects/ALS_Digital_Twins/All_processed_data/DataFile/lical0_processed_data_for_fp_model_21-01-2026.csv')
miro0 = pd.read_csv('/Users/Apple/projects/ALS_Digital_Twins/All_processed_data/DataFile/miro0_processed_data_for_fp_model_21-01-2026.csv')
miroli0 = pd.read_csv('/Users/Apple/projects/ALS_Digital_Twins/All_processed_data/DataFile/miroli0_processed_data_for_fp_model_21-01-2026.csv')
ril_3010 = pd.read_csv('/Users/Apple/projects/ALS_Digital_Twins/All_processed_data/DataFile/ril_3010_processed_data_for_fp_model_21-01-2026.csv')
proact0 = pd.read_csv('/Users/Apple/projects/ALS_Digital_Twins/All_processed_data/DataFile/proact0_processed_data_for_fp_model_21-01-2026.csv')
MND_lica = pd.read_csv('/Users/Apple/projects/ALS_Digital_Twins/All_processed_data/DataFIle/MNDRegisterDataset_licals.csv')
MND_miro = pd.read_csv('/Users/Apple/projects/ALS_Digital_Twins/All_processed_data/DataFIle/MNDRegisterDataset_mirocals.csv')
MND_rilu = pd.read_csv('/Users/Apple/projects/ALS_Digital_Twins/All_processed_data/DataFIle/MNDRegisterDataset_riluzole.csv')

#### **Fit Cox in Mirocals**

In [12]:
miro0.head(3)

Unnamed: 0,subject_id,Event,Disease_Duration,Study_Arm_Placebo,Age,TRICALS,Diagnostic_Delay,Vital_capacity,Onset_Limb,Sex_Male,Sex_onset,Age_Sex,Age_onset,Age_TRICALS,Age_VC,Sex_VC,Onset_VC,Age_sq
0,1-62478,1,29.174444,1,1.21716,0.080673,1.067127,0.292532,1,0,0,0.0,1.21716,0.098192,0.356058,0.0,0.292532,1.48148
1,1-62479,1,24.144657,0,0.679084,1.342729,1.128623,-0.914676,0,0,0,0.0,0.0,0.911826,-0.621142,-0.0,-0.0,0.461155
2,1-62480,0,31.308967,0,-0.134154,-0.436462,0.072996,0.107244,1,1,1,-0.134154,-0.134154,0.058553,-0.014387,0.107244,0.107244,0.017997


In [13]:
covs = ["Age", "Diagnostic_Delay", "Vital_capacity"]

cox_dev = CoxPHFitter()
cox_dev.fit(miro0[["Disease_Duration", "Event"] + covs], duration_col = "Disease_Duration", event_col = "Event")
print(cox_dev.summary[['coef', 'exp(coef)', 'p']])

                      coef  exp(coef)         p
covariate                                      
Age               0.312315   1.366585  0.006571
Diagnostic_Delay -0.469576   0.625267  0.000014
Vital_capacity   -0.363950   0.694926  0.001042


### **Predict survival in LICALS using MIROCALS model (transport)**

In [24]:
# Individual predicted survival curves for LICALS
S_pred_lica = cox_dev.predict_survival_function(lical0[covs])

# Average predicted survival curve across LICALS case-mix
S_pred_mean = S_pred_lica.mean(axis=1)   # index = time grid from lifelines

# Observed LICALS KM
km = KaplanMeierFitter().fit(lical0["Disease_Duration"], lical0["Event"])
S_obs = km.survival_function_["KM_estimate"].reindex(S_pred_mean.index, method="pad")

# Simple curve distance summary (integrated absolute error)
iae = np.trapz(np.abs(S_obs.values - S_pred_mean.values), x=S_pred_mean.index.values)
print("Integrated abs error (unweighted LICALS):", iae)

Integrated abs error (unweighted LICALS): 2.2588000905085868


  iae = np.trapz(np.abs(S_obs.values - S_pred_mean.values), x=S_pred_mean.index.values)


#### **Optional: a quick ‚Äúcalibration-in-the-large‚Äù check at a fixed horizon t0 (e.g., 12 months):**

In [15]:
t0 = 25.0
S_pred_t0 = float(S_pred_mean.loc[S_pred_mean.index <= t0].iloc[-1])
risk_pred_t0 = 1 - S_pred_t0

S_obs_t0 = float(S_obs.loc[S_obs.index <= t0].iloc[-1])
risk_obs_t0 = 1 - S_obs_t0

print("Pred risk @t0:", risk_pred_t0, "Obs risk @t0:", risk_obs_t0, "Diff:", risk_pred_t0-risk_obs_t0)

Pred risk @t0: 0.23111770813651944 Obs risk @t0: 0.21772849992975207 Diff: 0.013389208206767367


What it means (at ùë°0)

Predicted risk (MIROCALS model applied to LICALS) at ùë°0: 0.231

Observed risk (LICALS KM) at ùë°0: 0.218

Difference (Pred ‚àí Obs): +0.0134 (about +1.34 percentage points)

So at that horizon, the MIROCALS model is slightly over-predicting risk in LICALS (i.e., predicting slightly worse survival than observed).

Whether that‚Äôs ‚Äúgood enough‚Äù depends on your sample size and what at ùë°0 is, but numerically it‚Äôs small.

#### **3) Delay-only weighting: make LICALS‚Äô delay look like MIROCALS‚Äô delay**
3a) Learn ‚Äústudy membership‚Äù using delay only

We fit: P(MIRO=1 | Delay). Then weight LICALS patients by odds of being MIRO-like.

In [16]:
# Combine for propensity model
miro_tmp = miro0.copy()
lica_tmp = lical0.copy()
miro_tmp["is_miro"] = 1
lica_tmp["is_miro"] = 0
comb = pd.concat([miro_tmp, lica_tmp], ignore_index=True)

X = comb[["Diagnostic_Delay"]].values
y = comb["is_miro"].values

# Logistic regression with scaling for stability
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("lr", LogisticRegression(max_iter=500))
])
pipe.fit(X, y)

# Get MIRO probability for LICALS individuals
p_miro_lica = pipe.predict_proba(lical0[["Diagnostic_Delay"]].values)[:, 1]
p_miro_lica = np.clip(p_miro_lica, 1e-4, 1 - 1e-4)

# Odds weights for LICALS
w_delay = p_miro_lica / (1 - p_miro_lica)

# Optional weight trimming to avoid instability
w_delay = np.minimum(w_delay, np.quantile(w_delay, 0.99))
lica_w = lical0.copy()
lica_w["w_delay"] = w_delay
print(lica_w["w_delay"].describe())

count    1.760000e+02
mean     1.249968e+00
std      4.101519e-15
min      1.249968e+00
25%      1.249968e+00
50%      1.249968e+00
75%      1.249968e+00
max      1.249968e+00
Name: w_delay, dtype: float64


3b) Weighted observed KM in LICALS

In [18]:
km_w = KaplanMeierFitter().fit(lica_w["Disease_Duration"], lica_w["Event"], weights=lica_w["w_delay"])
S_obs_w = km_w.survival_function_["KM_estimate"].reindex(S_pred_mean.index, method="pad")

iae_w_delay = np.trapz(np.abs(S_obs_w.values - S_pred_mean.values), x=S_pred_mean.index.values)
print("Integrated abs error (LICALS weighted on delay):", iae_w_delay)
print("Improvement:", iae - iae_w_delay)

Integrated abs error (LICALS weighted on delay): 2.2588000905085948
Improvement: -7.993605777301127e-15


  It's important to know that the naive variance estimates of the coefficients are biased. Instead use Monte Carlo to
  estimate the variances. See paper "Variance estimation when using inverse probability of treatment weighting (IPTW) with survival analysis"
  or "Adjusted Kaplan-Meier estimator and log-rank test with inverse probability of treatment weighting for survival data."
                  
  iae_w_delay = np.trapz(np.abs(S_obs_w.values - S_pred_mean.values), x=S_pred_mean.index.values)


In [None]:
grid = S_pred_mean.index.values

S_obs_aligned = S_obs.reindex(grid, method="pad").values
S_obs_w_aligned = S_obs_w.reindex(grid, method="pad").values
S_pred_aligned = S_pred_mean.reindex(grid).values

iae = np.trapezoid(np.abs(S_obs_aligned - S_pred_aligned), x=grid)
iae_w_delay = np.trapezoid(np.abs(S_obs_w_aligned - S_pred_aligned), x=grid)
print("Integrated abs error (LICALS weighted on delay):", iae_w_delay)
print("Improvement:", iae - iae_w_delay)

# Decision rule
# If iae_w_delay drops a lot vs iae: delay distribution shift is a major driver.
# If it barely changes: delay is not the main driver (or overlap is poor).

Integrated abs error (LICALS weighted on delay): 2.2588000905085948
Improvement: -7.993605777301127e-15


***Diagnostic delay is not the driver of the transport mismatch ‚Äî at least not in the way you tested it (reweighting LICALS to match MIROCALS on delay alone).***

**4) Delay + Age + FVC weighting (stronger)**

Repeat the same weighting but with 3 predictors:

In [23]:
X3 = comb[["Age", "Diagnostic_Delay", "Vital_capacity"]].values
pipe3 = Pipeline([
    ("scaler", StandardScaler()),
    ("lr", LogisticRegression(max_iter=1000))
])
pipe3.fit(X3, y)

p_miro_lica3 = pipe3.predict_proba(lical0[["Age","Diagnostic_Delay","Vital_capacity"]].values)[:, 1]
p_miro_lica3 = np.clip(p_miro_lica3, 1e-4, 1 - 1e-4)
w3 = p_miro_lica3 / (1 - p_miro_lica3)
w3 = np.minimum(w3, np.quantile(w3, 0.99))

km_w3 = KaplanMeierFitter().fit(lical0["Disease_Duration"], lical0["Event"], weights=w3)
S_obs_w3 = km_w3.survival_function_["KM_estimate"].reindex(S_pred_mean.index, method="pad")

iae_w3 = np.trapz(np.abs(S_obs_w3.values - S_pred_mean.values), x=S_pred_mean.index.values)
print("Integrated abs error (LICALS weighted on Age+Delay+FVC):", iae_w3)
print("Improvement vs unweighted:", iae - iae_w3)

Integrated abs error (LICALS weighted on Age+Delay+FVC): 2.258800090508576
Improvement vs unweighted: 1.0658141036401503e-14


  It's important to know that the naive variance estimates of the coefficients are biased. Instead use Monte Carlo to
  estimate the variances. See paper "Variance estimation when using inverse probability of treatment weighting (IPTW) with survival analysis"
  or "Adjusted Kaplan-Meier estimator and log-rank test with inverse probability of treatment weighting for survival data."
                  
  iae_w3 = np.trapz(np.abs(S_obs_w3.values - S_pred_mean.values), x=S_pred_mean.index.values)


Scenario B: Little improvement after delay-only??, but big improvement after Age+Delay+FVC

Delay matters, but not alone. Multivariable case-mix shift explains mismatch.

Scenario C: No meaningful improvement even after Age+Delay+FVC weighting

Transport failure is not just case-mix. Likely:

baseline hazard differs by era/care system

missing predictors (e.g., NIV/PEG/riluzole use, enrolment year, site/country)

non-PH / interactions

This ‚Äúcase-mix vs model validity vs baseline hazard‚Äù framing is standard in external validation discussions.

These will tell you if your weighting is behaving:

In [None]:
def ess(w):
    w = np.asarray(w, float)
    return (w.sum()**2) / (w**2).sum()

print("Delay-weight summary:\n", pd.Series(w_delay).describe())
print("ESS:", ess(w_delay), "out of", len(w_delay))
print("Max weight:", np.max(w_delay))

# Rules of thumb:
# If ESS collapses (e.g., <30‚Äì40% of N), you likely have limited overlap ‚Üí interpret ‚Äúdelay explains transport‚Äù cautiously.

Delay-weight summary:
 count    1.760000e+02
mean     1.249968e+00
std      4.101519e-15
min      1.249968e+00
25%      1.249968e+00
50%      1.249968e+00
75%      1.249968e+00
max      1.249968e+00
dtype: float64
ESS: 176.0 out of 176
Max weight: 1.2499678779959291


Test whether the issue is baseline hazard shift.

Quick experiment:

Fit a Cox model in LICALS with the MIROCALS linear predictor as a single covariate:

In [26]:
# Step 1: compute MIROCALS linear predictor for LICALS
lp_lica = cox_dev.predict_partial_hazard(lical0[covs])
lica_lp = lical0.copy()
lica_lp["lp"] = np.log(lp_lica)

# Step 2: recalibration model
cox_recal = CoxPHFitter()
cox_recal.fit(lica_lp[["Disease_Duration","Event","lp"]], duration_col="Disease_Duration", event_col="Event")

cox_recal.summary

Unnamed: 0_level_0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
covariate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
lp,1.911028,6.760034,0.211558,1.496382,2.325674,4.465504,10.233574,0.0,9.033119,1.668475e-19,62.378104


***The MIROCALS prognostic effects do not transport to LICALS.
Differences in survival arise from effect-size instability and/or missing effect modifiers rather than from baseline case-mix differences.***