In [1]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)


In [2]:
import sys
import glob, os

# remove warnings
import warnings
warnings.filterwarnings('ignore')

from pyprojroot import here

# Add the project root to sys.path
sys.path.append(str(here()))

from hspc.config import get_config
config = get_config()

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split,StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [4]:
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import ExtraTreesClassifier

from collections import Counter

from sklearn.metrics import roc_curve, roc_auc_score, auc
from sklearn.model_selection import cross_val_score

In [5]:

from tensorflow import keras
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
import numpy as np

In [6]:
import random
def set_seed(seed=42):
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    
set_seed(42)

In [7]:
from tabpfn import TabPFNClassifier as tbp
import shapiq

# Load Data
Here we will load the data from IU and CU (Piccirillo)

In [8]:
dataset = pd.read_csv(here() / config.data_interim / 'd_p_h_combined_unnormalized.csv')
dataset

Unnamed: 0,Gender,Age,WBC,Hgb,Hct,Platelet Ct,Neut Pct,Lymphocyte Abs,Mono Pct,Lymph Pct,Eos Pct,Mono Abs,Neut Abs,Eos Abs,Day 1 CD34 Absolute per uL,Count,lab
0,1,39,11.2,14.6,43.5,328.0,64.5,2.9,7.4,25.5,1.9,0.8,7.2,0.2,100.0,1,Dipersio
1,0,57,6.5,14.5,42.9,280.0,48.1,2.7,5.9,41.1,4.0,0.4,3.1,0.3,71.0,1,Dipersio
2,1,43,4.4,15.0,43.9,299.0,58.0,1.3,10.0,28.7,2.7,0.4,2.6,0.1,87.0,1,Dipersio
3,0,50,6.7,14.2,40.6,243.0,56.4,2.0,7.7,29.8,4.8,0.5,3.8,0.3,27.0,0,Dipersio
4,0,32,9.8,13.3,39.2,373.0,65.1,2.6,6.4,26.5,1.7,0.6,6.4,0.2,31.0,0,Dipersio
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1155,1,23,31.3,13.5,39.4,257.0,76.0,2.8,8.0,9.0,1.0,2.5,23.8,0.3,58.0,1,hurwitz
1156,1,56,37.0,15.4,44.9,233.0,77.0,4.4,7.0,12.0,1.0,2.6,28.5,0.4,59.0,1,hurwitz
1157,0,25,51.9,13.8,40.7,265.0,64.0,7.3,9.0,14.0,1.0,4.7,33.2,0.5,56.0,1,hurwitz
1158,1,22,51.0,14.6,42.9,250.0,89.0,4.1,2.0,8.0,1.0,1.0,45.3,0.5,174.0,1,hurwitz


In [9]:
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, optimizers, losses, callbacks
import numpy as np


In [10]:
class GradientReversal(layers.Layer):
    """
    λ-scaled Gradient Reversal Layer.
    In the forward pass it is the identity;
    in the backward pass it multiplies upstream gradients by –λ.
    """
    def __init__(self, lambd=1.0, **kwargs):
        super().__init__(**kwargs)
        self.lambd = lambd

    @tf.custom_gradient
    def _reverse(self, x):
        def grad(dy):
            return -self.lambd * dy         # flip & scale gradients
        return x, grad

    def call(self, inputs):
        return self._reverse(inputs)


In [11]:
def build_dann(input_dim: int,
               enc_units=(256, 128, 64),
               drop_rate=0.3,
               l2=5e-4,
               grl_lambda=1.0):
    """
    input_dim : number of CBC & demographic features
    enc_units : tuple with hidden units of the shared encoder
    """
    # ----- Shared encoder ---------------------------------------------------
    x_in   = layers.Input(shape=(input_dim,), name="features")
    x      = x_in
    for u in enc_units:
        x = layers.Dense(u,
                         activation="relu",
                         kernel_regularizer=regularizers.l2(l2))(x)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(drop_rate)(x)
    shared = layers.BatchNormalization(name="shared_repr")(x)

    # ----- Mobilization-outcome head ----------------------------------------
    out = layers.Dense(1, activation="sigmoid", name="mobilization")(shared)

    # ----- Domain (time-point) head w/ GRL ----------------------------------
    grl = GradientReversal(lambd=grl_lambda, name="grl")(shared)
    dom = layers.Dense(64, activation="relu")(grl)
    dom = layers.Dense(1, activation="sigmoid", name="domain")(dom)

    model = models.Model(inputs=x_in, outputs=[out, dom], name="DANN_mobilizer")
    return model


In [12]:
# example shapes ------------------------------------------------------------
X      = np.random.rand(10_000, 16)   # 16 lab + demographic features
y_out  = np.random.randint(0, 2, size=(10_000, 1))   # good / poor
y_dom  = np.random.randint(0, 2, size=(10_000, 1))   # pre / post
# ---------------------------------------------------------------------------

model = build_dann(input_dim=X.shape[1], grl_lambda=1.0)

losses_dict   = {
    "mobilization": losses.BinaryCrossentropy(from_logits=False),
    "domain"      : losses.BinaryCrossentropy(from_logits=False)
}
# weight domain loss a bit lower if you don’t want it to dominate
loss_weights  = {"mobilization": 1.0, "domain": 1.0}

model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss=losses_dict,
    loss_weights=loss_weights,
    metrics={"mobilization": ["accuracy", tf.keras.metrics.AUC(name="AUC")],
             "domain"      : ["accuracy"]}
)

es = callbacks.EarlyStopping(
    monitor="val_mobilization_AUC", mode="max",
    patience=8, restore_best_weights=True, verbose=1)

hist = model.fit(
    X, {"mobilization": y_out, "domain": y_dom},
    validation_split=0.2,
    epochs=200,
    batch_size=64,
    callbacks=[es],
    verbose=2,
    shuffle=True)



Epoch 1/200
125/125 - 4s - 28ms/step - domain_accuracy: 0.4989 - domain_loss: 0.7877 - loss: 1.8097 - mobilization_AUC: 0.4953 - mobilization_accuracy: 0.4961 - mobilization_loss: 0.8797 - val_domain_accuracy: 0.5120 - val_domain_loss: 0.6942 - val_loss: 1.5325 - val_mobilization_AUC: 0.5132 - val_mobilization_accuracy: 0.4975 - val_mobilization_loss: 0.6964
Epoch 2/200
125/125 - 0s - 3ms/step - domain_accuracy: 0.4930 - domain_loss: 0.7657 - loss: 1.7450 - mobilization_AUC: 0.5037 - mobilization_accuracy: 0.5056 - mobilization_loss: 0.8384 - val_domain_accuracy: 0.4845 - val_domain_loss: 0.6962 - val_loss: 1.5364 - val_mobilization_AUC: 0.5090 - val_mobilization_accuracy: 0.5070 - val_mobilization_loss: 0.6996
Epoch 3/200
125/125 - 0s - 4ms/step - domain_accuracy: 0.5006 - domain_loss: 0.7461 - loss: 1.7057 - mobilization_AUC: 0.5099 - mobilization_accuracy: 0.5090 - mobilization_loss: 0.8197 - val_domain_accuracy: 0.5035 - val_domain_loss: 0.7003 - val_loss: 1.5496 - val_mobilizatio

In [13]:
mobilizer_score = model.predict(X_new)[0]     # first element of list


NameError: name 'X_new' is not defined