In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler


In [3]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

TensorFlow version: 2.16.2
Num GPUs Available:  1


In [36]:
devices = tf.config.list_physical_devices()
for d in devices:
    print(d)

PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')
PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [6]:
df = pd.read_csv("preprocessed_dataset/preprocessed_train_nondiff.csv")

In [7]:
df['SEX'] = df['SEX'].map({'M': 1, 'F': 0})

In [8]:
min_max_scaler = MinMaxScaler()
df['AGE'] = min_max_scaler.fit_transform(df[['AGE']])

In [9]:
import json

In [10]:
def obtain_evidences():
    with open("huggingface_dataset/ddxplus/release_evidences.json", "r") as f:
        release_evidences = json.load(f)
    set_evidences = []
    for key, value in release_evidences.items():
        if len(value['possible-values']) > 0:
            for val in value['possible-values']:
                set_evidences.append(f'{key}_@_{val}')
        else:
            set_evidences.append(f'{key}')
    return set_evidences

In [11]:
def obtain_conditions():
    with open("huggingface_dataset/ddxplus/release_conditions.json") as f:
        release_conditions = json.load(f)
    return [a for a in release_conditions.keys()]

In [12]:
features = ['AGE', 'SEX'] + obtain_evidences()
labels = obtain_conditions()

In [13]:
X_train = df[features].values

In [14]:
Y_train = df[labels].values

In [15]:
print(X_train.shape)
print(Y_train.shape)

(134529, 974)
(134529, 49)


In [16]:
from tensorflow.keras import layers, models
from tensorflow.keras import regularizers

In [17]:
gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
    print(f"GPU găsit: {gpu.name}")

GPU găsit: /physical_device:GPU:0


In [16]:
def elementwise_accuracy(y_true, y_pred, threshold=0.03):
    diffs = tf.abs(y_true - y_pred)
    all_close = tf.reduce_all(diffs <= threshold, axis=1)
    return tf.reduce_mean(tf.cast(all_close, tf.float32))

In [17]:
import tensorflow as tf

def top3_tolerant_accuracy(y_true, y_pred):
    top3_indices = tf.argsort(y_pred, direction='DESCENDING')[:, :3]
    batch_size = tf.shape(y_pred)[0]

    batch_indices = tf.range(batch_size)
    batch_indices = tf.reshape(batch_indices, (-1, 1))
    batch_indices = tf.tile(batch_indices, [1, 3])
    indices = tf.stack([batch_indices, top3_indices], axis=2)

    top3_true = tf.gather_nd(y_true, indices)
    top3_pred = tf.gather_nd(y_pred, indices)

    abs_diff = tf.abs(top3_true - top3_pred)
    correct = tf.reduce_all(abs_diff <= 0.05001, axis=1)
    return tf.reduce_mean(tf.cast(correct, tf.float32))

In [18]:
y_true = tf.constant([
    [0.0, 0.2, 0.6, 0.1, 0.1],
    [0.7, 0.1, 0.1, 0.05, 0.05]
], dtype=tf.float32)

y_pred = tf.constant([
    [0.05, 0.22, 0.5, 0.08, 0.07],
    [0.75, 0.08, 0.08, 0.04, 0.05]
], dtype=tf.float32)

acc = top3_tolerant_accuracy(y_true, y_pred)
print(acc)


tf.Tensor(0.5, shape=(), dtype=float32)


2025-04-07 17:57:36.750198: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2025-04-07 17:57:36.750258: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 64.00 GB
2025-04-07 17:57:36.750267: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 24.00 GB
2025-04-07 17:57:36.750302: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-04-07 17:57:36.750324: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [18]:
input_dim = len(features)
output_dim = len(labels)

# model = models.Sequential([
#     layers.Input(shape=(input_dim,), name='Input'),
#     layers.Dense(512, activation='relu', 
#                  kernel_regularizer=regularizers.l2(0.01)),
#     layers.Dropout(0.3),
#     layers.Dense(256, activation='relu', 
#                  kernel_regularizer=regularizers.l2(0.01)),
#     layers.Dropout(0.3),
#     layers.Dense(128, activation='relu', 
#                  kernel_regularizer=regularizers.l2(0.01)),
#     layers.Dense(output_dim, activation='softmax', name='Output')
# ])

model = models.Sequential([
    layers.Input(shape=(input_dim,), name='Input'),
    
    # Straturi ascunse (de tip DNN)
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(32, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    
    # Stratul de ieșire
    layers.Dense(output_dim, activation='softmax', name='Output')
])

2025-04-07 18:39:05.963767: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2025-04-07 18:39:05.963804: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 64.00 GB
2025-04-07 18:39:05.963811: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 24.00 GB
2025-04-07 18:39:05.963831: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-04-07 18:39:05.963849: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [24]:
def combined_loss(y_true, y_pred):
    kl = tf.keras.losses.KLDivergence()(y_true, y_pred)
    top3 = 1.0 - top3_tolerant_accuracy(y_true, y_pred)
    return 0.999 * top3 + 0.0001 * kl

In [19]:
from tensorflow.keras.losses import BinaryCrossentropy


In [20]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=BinaryCrossentropy(), metrics=['accuracy']
)

In [23]:
model.fit(
    X_train, Y_train,
    epochs=10,
    batch_size=32
)

Epoch 1/10
[1m4205/4205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 19ms/step - accuracy: 0.8208 - loss: 0.0391
Epoch 2/10
[1m4205/4205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 19ms/step - accuracy: 0.8413 - loss: 0.0369
Epoch 3/10
[1m4205/4205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 19ms/step - accuracy: 0.8529 - loss: 0.0352
Epoch 4/10
[1m4205/4205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 19ms/step - accuracy: 0.8632 - loss: 0.0338
Epoch 5/10
[1m4205/4205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1163s[0m 277ms/step - accuracy: 0.8722 - loss: 0.0328
Epoch 6/10
[1m4205/4205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 19ms/step - accuracy: 0.8809 - loss: 0.0318
Epoch 7/10
[1m4205/4205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 19ms/step - accuracy: 0.8880 - loss: 0.0311
Epoch 8/10
[1m4205/4205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 19ms/step - accuracy: 0.8917 - loss: 0.0302
Epoch

<keras.src.callbacks.history.History at 0x362261a60>

In [25]:
X_train.shape

(134529, 974)

In [26]:
df_test = pd.read_csv("preprocessed_dataset/preprocessed_test_nondiff.csv")

In [27]:
df_test['SEX'] = df_test['SEX'].map({'M': 1, 'F': 0})

In [30]:
min_max_scaler = MinMaxScaler()
df_test['AGE'] = min_max_scaler.fit_transform(df_test[['AGE']])

In [31]:
X_test = df_test[features].values
Y_test = df_test[labels].values

In [32]:
y_pred_proba = model.predict(X_test)

y_pred = np.argmax(y_pred_proba, axis=1)

y_true = np.argmax(Y_test, axis=1)

[1m8409/8409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2ms/step


In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")

Accuracy: 0.9783
F1-score: 0.9720
Precision: 0.9833
Recall: 0.9783


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [35]:
model.save("saved_models/neural_network_98_accuracy.keras")

In [8]:
import os
import json
print(os.listdir())

with open("huggingface_dataset/ddxplus/release_evidences.json", "r") as f:
    code_and_question = json.load(f)

['.DS_Store', 'pathology_model.ipynb', 'preprocess_data.ipynb', 'huggingface_dataset', 'saved_models', 'plug_and_play_nn.ipynb', 'preprocessed_dataset']


In [22]:
for key, value in code_and_question.items():
    print(f"{key} : {value['question_en']}")

E_91 : Do you have a fever (either felt or measured with a thermometer)?
E_55 : Do you feel pain somewhere?
E_53 : Do you have pain somewhere, related to your reason for consulting?
E_57 : Does the pain radiate to another location?
E_54 : Characterize your pain:
E_59 : How fast did the pain appear?
E_56 : How intense is the pain?
E_58 : How precisely is the pain located?
E_159 : Did you lose consciousness?
E_133 : Where is the affected region located?
E_129 : Do you have any lesions, redness or problems on your skin that you believe are related to the condition you are consulting for?
E_130 : What color is the rash?
E_134 : How intense is the pain caused by the rash?
E_132 : Is the rash swollen?
E_136 : How severe is the itching?
E_135 : Is the lesion (or are the lesions) larger than 1cm?
E_131 : Do your lesions peel off?
E_154 : Is your skin much paler than usual?
E_155 : Do you feel your heart is beating fast (racing), irregularly (missing a beat) or do you feel palpitations?
E_210 :