In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
import pandas as pd
!pip install lifelines
from lifelines import WeibullAFTFitter, LogNormalAFTFitter

file_path = "/kaggle/input/lungss/lung.csv"
df = pd.read_csv(file_path)

# Clean column names
df.columns = [c.strip().lower().replace(".", "_") for c in df.columns]

# Prepare variables
df["event"] = (df["status"] == 1).astype(int)
df["sex_male"] = (df["sex"] == 1).astype(int)
df["inst"] = df["inst"].astype(str)
inst_dummies = pd.get_dummies(df["inst"], prefix="inst", drop_first=True)
df2 = pd.concat([df, inst_dummies], axis=1)

# Covariates
covariates = [
    "age",
    "sex_male",
    "ph_ecog",
    "ph_karno",
    "pat_karno",
    "meal_cal",
    "wt_loss",
] + list(inst_dummies.columns)

model_df = df2[["time", "event"] + covariates].dropna() #all null values are dropped


weib = WeibullAFTFitter().fit(model_df, duration_col="time", event_col="event")
logn = LogNormalAFTFitter().fit(model_df, duration_col="time", event_col="event")

# Compute AICs
weib_aic, logn_aic = weib.AIC_, logn.AIC_

# Compute survival at 188 days for specified patient
patient = {c: 0 for c in covariates}
patient.update({
    "age": 68,
    "sex_male": 1,
    "ph_ecog": 1,
    "ph_karno": 80,
    "pat_karno": 85,
    "meal_cal": 1000,
    "wt_loss": 10,
})
patient["inst_3"] = 1

patient_df = pd.DataFrame([patient])[covariates]

t = 188
weib_sf = weib.predict_survival_function(patient_df, times=[t]).iloc[0, 0]
logn_sf = logn.predict_survival_function(patient_df, times=[t]).iloc[0, 0]

weib.print_summary()

results = {
    "Weibull_AIC": weib_aic,
    "LogNormal_AIC": logn_aic,
    "Weibull_Survival_188": weib_sf,
    "LogNormal_Survival_188": logn_sf,
}
results





It's advisable to not trust the variances reported, and to be suspicious of the fitted parameters too.

Some ways to possible ways fix this:

1. Does a particularly large variable need to be centered to 0?
2. Inspect your DataFrame: does everything look as expected? Do you need to add/drop a constant (intercept) column?
3. Is there high-collinearity in the dataset? Try using the variance inflation factor (VIF) to find redundant variables.
4. Trying adding a small penalizer (or changing it, if already present). Example: `WeibullAFTFitter(penalizer=0.01).fit(...)`.
5. Are there any extreme outliers? Try modeling them or dropping them to see if it helps convergence.


It's advisable to not trust the variances reported, and to be suspicious of the fitted parameters too.

Some ways to possible ways fix this:

1. Does a particularly large variable need to be centered to 0?
2. Inspect your DataFrame: does everything look as expected? Do you need to add/drop a constant (intercept) column?
3. 

0,1
model,lifelines.WeibullAFTFitter
duration col,'time'
event col,'event'
number of observations,168
number of events observed,47
log-likelihood,-344.24
time fit was run,2025-10-20 09:46:00 UTC

Unnamed: 0,Unnamed: 1,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
lambda_,age,0.01,1.01,0.01,-0.01,0.03,0.99,1.03,0.0,0.97,0.33,1.59
lambda_,inst_10.0,225.43,7.980000000000001e+97,0.0,225.43,225.43,7.980000000000001e+97,7.980000000000001e+97,0.0,4722100911505220.0,<0.005,inf
lambda_,inst_11.0,-0.46,0.63,0.29,-1.02,0.1,0.36,1.11,0.0,-1.6,0.11,3.2
lambda_,inst_12.0,-0.08,0.93,0.3,-0.66,0.51,0.52,1.66,0.0,-0.26,0.79,0.33
lambda_,inst_13.0,-0.1,0.91,0.26,-0.61,0.42,0.54,1.52,0.0,-0.37,0.71,0.48
lambda_,inst_15.0,-0.02,0.98,0.39,-0.78,0.75,0.46,2.11,0.0,-0.04,0.97,0.05
lambda_,inst_16.0,-0.04,0.96,0.31,-0.64,0.56,0.53,1.76,0.0,-0.12,0.90,0.15
lambda_,inst_2.0,-0.29,0.75,0.49,-1.25,0.67,0.29,1.95,0.0,-0.6,0.55,0.86
lambda_,inst_21.0,183.39,4.42e+79,0.0,183.39,183.39,4.42e+79,4.42e+79,0.0,1.7394499331426736e+16,<0.005,inf
lambda_,inst_22.0,0.23,1.26,0.33,-0.41,0.88,0.66,2.4,0.0,0.7,0.48,1.05

0,1
Concordance,0.71
AIC,742.48
log-likelihood ratio test,32.77 on 25 df
-log2(p) of ll-ratio test,2.87


{'Weibull_AIC': 742.4835209906133,
 'LogNormal_AIC': 736.5097037977712,
 'Weibull_Survival_188': 0.9684589812703487,
 'LogNormal_Survival_188': 0.9873319227095851}