In [None]:
import numpy as np
import pandas as pd
from keras import layers, models, Input
from keras.callbacks import EarlyStopping, LambdaCallback
import os
import io
import tensorflow as tf

In [36]:
# https://www.kaggle.com/datasets/olcaybolat1/dermatology-dataset-classification

df = pd.read_csv("clinical_data.csv")

In [37]:
# If the age column is unfilled, replace it with the mean age

df['age'] = df['age'].replace('?', np.nan).astype(float)

mean_age = int(df['age'].mean())
df['age'] = df['age'].fillna(mean_age).astype(int)

In [38]:
(df['age'] == '?').any()

np.False_

In [39]:
train=df.sample(frac=0.8,random_state=200)
test=df.drop(train.index)

In [None]:
num_classes = 6

model = models.Sequential([
		Input(shape=(12,)),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [41]:
feature_cols = [
    'erythema', 'scaling', 'definite_borders', 'itching',
    'koebner_phenomenon', 'polygonal_papules', 'follicular_papules',
    'oral_mucosal_involvement', 'knee_and_elbow_involvement', 'scalp_involvement',
    'family_history', 'age'
]

In [42]:
# Prepare training data
X_train = train[feature_cols].astype('float32').values
y_train_raw = train['class'].astype(int).values
num_classes = len(np.unique(y_train_raw))
y_train = tf.keras.utils.to_categorical(y_train_raw - 1, num_classes)

# Prepare test data
X_test = test[feature_cols].astype('float32').values
y_test_raw = test['class'].astype(int).values
y_test = tf.keras.utils.to_categorical(y_test_raw - 1, num_classes)

early_stop = EarlyStopping(
  monitor='val_loss',        # watch validation loss
  patience=25,                # wait this many epochs with no improvement
  restore_best_weights=True  # roll back to the best epoch’s weights
)

num_checkpoints = len(os.listdir("checkpoint"))

save_final = LambdaCallback(
    on_train_end=lambda logs: model.save_weights(
        f"checkpoint/cp-{num_checkpoints + 1:01d}.weights.h5"
    )
)

model.fit(
	X_train, y_train,
	epochs=1000,
	validation_data=(X_test, y_test),
	shuffle=True,
	callbacks=[early_stop, save_final]
)

Epoch 1/1000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.2034 - loss: 3.1490 - val_accuracy: 0.4110 - val_loss: 1.5876
Epoch 2/1000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2694 - loss: 2.3220 - val_accuracy: 0.4658 - val_loss: 1.4742
Epoch 3/1000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2552 - loss: 2.1389 - val_accuracy: 0.4658 - val_loss: 1.4989
Epoch 4/1000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2898 - loss: 1.9007 - val_accuracy: 0.4932 - val_loss: 1.4856
Epoch 5/1000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3246 - loss: 1.8342 - val_accuracy: 0.4932 - val_loss: 1.4876
Epoch 6/1000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2220 - loss: 1.8620 - val_accuracy: 0.4795 - val_loss: 1.4823
Epoch 7/1000
[1m10/10[0m 

<keras.src.callbacks.history.History at 0x16a33f340>

In [43]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
training_result = f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}"
print(training_result)


Test Loss: 0.2560, Test Accuracy: 0.8767


In [None]:
X_all = df[feature_cols].astype('float32').values

probs = model.predict(X_all, verbose=0) # shape (n_samples, 12)

# 4. Derive predicted class and confidence
pred_classes = np.argmax(probs, axis=1) + 1 
confidences = np.max(probs, axis=1)

# 5. True labels
true_classes = df['class'].astype(int).values

# 6. Compute biopsy_needed: 0 if model is ≥80% confident AND correct; else 1
df['biopsy_needed'] = (
	~(
		(pred_classes == true_classes) &
		(confidences >= 0.8)
	)
).astype(int)

num_biopsy_needed = (df["biopsy_needed"] == 1).sum()
num_biopsy_not_needed = (df["biopsy_needed"] == 0).sum()

biopsy_result = f"Biopsies needed: {num_biopsy_needed}, Not needed: {num_biopsy_not_needed}, {num_biopsy_not_needed / (num_biopsy_needed + num_biopsy_not_needed) * 100:.2f}% not needed"
print(biopsy_result)
buf = io.StringIO()

model.summary(
	print_fn=lambda s: buf.write(s + "\n"),
	line_length=40,
)

summary_str = buf.getvalue()

with open(f"data/results-{num_checkpoints + 1:01d}.txt", "a") as f:
	f.write(training_result)
	f.write("\n")
	f.write(summary_str)
	f.write("\n")
	f.write(biopsy_result)


Biopsies needed: 105, Not needed: 261, 71.31% not needed


In [45]:
model.save("dermatology_model.keras")
model.save_weights("dermatology_model.weights.h5")