# DeepHit: A Deep Learning Approach to Survival Analysis with Competing Risks

In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.optimizers import Adam

from src.model import DeepHit 
from src.tools import preprocess_metabrics, prepare_deephit_data

In [2]:
# === 1. Load & preprocess METABRIC ===
df = pd.read_csv("data/METABRIC_RNA_Mutation.csv")  # example filename
df = preprocess_metabrics(df)
data = prepare_deephit_data(df, num_bins=100)

train = data["train"]
test = data["test"]
meta = data["meta"]

# === 2. Initialize model ===
network_settings = {
    "h_dim_shared": 64,
    "h_dim_CS": 32,
    "num_layers_shared": 2,
    "num_layers_CS": 2,
    "active_fn": "relu",
    "keep_prob": 0.8,
}

model = DeepHit(meta, network_settings)
optimizer = Adam(learning_rate=1e-3)

# === 3. Convert NumPy arrays → TensorFlow tensors ===
x_train = tf.convert_to_tensor(train["x"], dtype=tf.float32)
t_train = tf.convert_to_tensor(train["t"], dtype=tf.int32)
e_train = tf.convert_to_tensor(train["e"], dtype=tf.int32)
m1_train = tf.convert_to_tensor(train["m1"], dtype=tf.float32)
m2_train = tf.convert_to_tensor(train["m2"], dtype=tf.float32)

x_test = tf.convert_to_tensor(test["x"], dtype=tf.float32)
t_test = tf.convert_to_tensor(test["t"], dtype=tf.int32)
e_test = tf.convert_to_tensor(test["e"], dtype=tf.int32)
m1_test = tf.convert_to_tensor(test["m1"], dtype=tf.float32)
m2_test = tf.convert_to_tensor(test["m2"], dtype=tf.float32)

# === 4. Train loop ===
alpha, beta, gamma = 1.0, 1.0, 0.0  # you can tune these
epochs = 50
batch_size = 128
num_samples = x_train.shape[0]

for epoch in range(epochs):
    # Shuffle training data
    idx = tf.random.shuffle(tf.range(num_samples))
    x_train = tf.gather(x_train, idx)
    t_train = tf.gather(t_train, idx)
    e_train = tf.gather(e_train, idx)
    m1_train = tf.gather(m1_train, idx)
    m2_train = tf.gather(m2_train, idx)

    # Mini-batch training
    for i in range(0, num_samples, batch_size):
        xb = x_train[i : i + batch_size]
        tb = t_train[i : i + batch_size]
        eb = e_train[i : i + batch_size]
        m1b = m1_train[i : i + batch_size]
        m2b = m2_train[i : i + batch_size]

        with tf.GradientTape() as tape:
            loss, _ = model.compute_loss(xb, eb, tb, m1b, m2b, alpha, beta, gamma)
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

    print(f"Epoch {epoch+1}/{epochs} | Loss: {loss.numpy():.4f}")

# === 5. Inference ===
out_test = model(x_test, training=False)
print("Output shape:", out_test.shape)

  df = pd.read_csv("data/METABRIC_RNA_Mutation.csv")  # example filename


Epoch 1/50 | Loss: 25.6909
Epoch 2/50 | Loss: 22.5683
Epoch 3/50 | Loss: 19.2246
Epoch 4/50 | Loss: 13.5191
Epoch 5/50 | Loss: 14.1546
Epoch 6/50 | Loss: 14.6311
Epoch 7/50 | Loss: 14.8692
Epoch 8/50 | Loss: 10.7495
Epoch 9/50 | Loss: 12.5307
Epoch 10/50 | Loss: 10.3468
Epoch 11/50 | Loss: 7.7822
Epoch 12/50 | Loss: 11.3699
Epoch 13/50 | Loss: 11.4499
Epoch 14/50 | Loss: 8.9133
Epoch 15/50 | Loss: 11.0006
Epoch 16/50 | Loss: 8.9140
Epoch 17/50 | Loss: 10.0702
Epoch 18/50 | Loss: 10.8099
Epoch 19/50 | Loss: 9.1782
Epoch 20/50 | Loss: 10.6312
Epoch 21/50 | Loss: 11.0518
Epoch 22/50 | Loss: 8.1762
Epoch 23/50 | Loss: 7.7969
Epoch 24/50 | Loss: 9.3407
Epoch 25/50 | Loss: 10.1495
Epoch 26/50 | Loss: 10.6457
Epoch 27/50 | Loss: 9.2338
Epoch 28/50 | Loss: 8.1458
Epoch 29/50 | Loss: 8.9470
Epoch 30/50 | Loss: 9.2013
Epoch 31/50 | Loss: 6.0720
Epoch 32/50 | Loss: 6.8569
Epoch 33/50 | Loss: 8.5290
Epoch 34/50 | Loss: 7.3412
Epoch 35/50 | Loss: 9.1215
Epoch 36/50 | Loss: 7.1654
Epoch 37/50 | Loss