In [None]:
# @title Importing Modules

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix

In [None]:
# @title Generating Simulation Dataset

# Creating simulation dataset with features and labels

# Features
age = 0
sex = 0
pdStage = 0
contra = 0
levoF = 0
dopAgoF = 0
dbsF = 0
duopaF = 0

# Labels (with counters)
levoL = 0
levoCount = 0
dopAgoL = 0
dopAgoCount = 0
dbsL = 0
dbsCount = 0
duopaL = 0
duopaCount = 0

np.random.seed(100)

all_data = []

for x in range(100000):
  # Variables
  a = np.random.random() * 100
  s = np.random.random() * 100
  p = np.random.randint(100)
  c = np.random.random() * 100
  le = np.random.random() * 100
  leDA = np.random.random() * 100
  da = np.random.random() * 100
  db = np.random.random() * 100
  du = np.random.random() * 100

  # Age: 40-49 is 0 <= a < 0.4, 50-59 is 0.4 <= a < 2.3,
  # 60-69 is 2.3 <= a < 9.2, 70-79 is 9.2 <= a < 31.3,
  # 80-89 is 31.3 <= a < 67.8, 90-100 is 67.8 <= a < 100
  if a >= 0 and a < 0.4:
    # Age: 40-49
    # Sex: 65.6% Male, 34.4% Female
    # PD Stage: 96% Stages 1-3, 4% Stage 4
    # Presence of Contraindications: 13.0% in Male, 19.4% in Female
    # Treatment(s):
    # If 1 ≤ PD Stage ≤ 2, Levo: 70.3%, DA: 15.5%, LevoDA: 14.2%;
    # If 3 ≤ PD Stage ≤ 4, Levo: 63.4%, DA: 29.7%, DBS: 10.0%, Duopa: 5.0%
    age = np.random.randint(low=40, high=50)
    if s < 65.6:
      sex = 1
    else:
      sex = 0
    if p < 96:
      pdStage = np.random.randint(low=1, high=4)
    else:
      pdStage = 4
    if sex == 1:
      if c < 13.0:
        contra = 1
      else:
        contra = 0
    if sex == 0:
      if c < 19.4:
        contra = 1
      else:
        contra = 0
    if pdStage < 3:
      if le < 70.3:
        levoF = 1
      if da < 15.5:
        dopAgoF = 1
      if leDA < 14.2:
        levoF = 1
        dopAgoF = 1
    if pdStage >= 3:
      if le < 63.4:
        levoF = 1
      if da < 29.7:
        dopAgoF = 1
      if db < 10.0:
        dbsF = 1
      if du < 5.0:
        duopaF = 1
  if a >= 0.4 and a < 2.3:
    # Age: 50-59
    # Sex: 62.1% Male, 37.9% Female
    # PD Stage: 91% Stages 1-3, 9% Stage 4
    # Presence of Contraindications: 16.0% in Male, 22.4% in Female
    # Treatment(s):
    # If 1 ≤ PD Stage ≤ 2, Levo: 70.3%, DA: 15.5%, LevoDA: 14.2%;
    # If 3 ≤ PD Stage ≤ 4, Levo: 69.6%, DA: 29.7%, DBS: 30.0%, Duopa: 7.5%
    age = np.random.randint(low=50, high=60)
    if s < 62.1:
      sex = 1
    else:
      sex = 0
    if p < 91:
      pdStage = np.random.randint(low=1, high=4)
    else:
      pdStage = 4
    if sex == 1:
      if c < 16.0:
        contra = 1
      else:
        contra = 0
    if sex == 0:
      if c < 22.4:
        contra = 1
      else:
        contra = 0
    if pdStage < 3:
      if le < 70.3:
        levoF = 1
      if da < 15.5:
        dopAgoF = 1
      if leDA < 14.2:
        levoF = 1
        dopAgoF = 1
    if pdStage >= 3:
      if le < 69.6:
        levoF = 1
      if da < 29.7:
        dopAgoF = 1
      if db < 30.0:
        dbsF = 1
      if du < 7.5:
        duopaF = 1
  if a >= 2.3 and a < 9.2:
    # Age: 60-69
    # Sex: 60.0% Male, 40.0% Female
    # PD Stage: 89% Stages 1-3, 11% Stages 4-5
    # Presence of Contraindications: 19.0% in Male, 25.4% in Female
    # Treatment(s):
    # If 1 ≤ PD Stage ≤ 2, Levo: 70.3%, DA: 15.5%, LevoDA: 14.2%;
    # If 3 ≤ PD Stage ≤ 5, Levo: 75.8%, DA: 29.7%, DBS: 30.0%, Duopa: 10.0%
    age = np.random.randint(low=60, high=70)
    if s < 60.0:
      sex = 1
    else:
      sex = 0
    if p < 89:
      pdStage = np.random.randint(low=1, high=4)
    else:
      pdStage = np.random.randint(low=4, high=6)
    if sex == 1:
      if c < 19.0:
        contra = 1
      else:
        contra = 0
    if sex == 0:
      if c < 25.4:
        contra = 1
      else:
        contra = 0
    if pdStage < 3:
      if le < 70.3:
        levoF = 1
      if da < 15.5:
        dopAgoF = 1
      if leDA < 14.2:
        levoF = 1
        dopAgoF = 1
    if pdStage >= 3:
      if le < 75.8:
        levoF = 1
      if da < 29.7:
        dopAgoF = 1
      if db < 30.0:
        dbsF = 1
      if du < 10.0:
        duopaF = 1
  if a >= 9.2 and a < 31.3:
    # Age: 70-79
    # Sex: 59.0% Male, 41.0% Female
    # PD Stage: 83% Stages 1-3, 17% Stages 4-5
    # Presence of Contraindications: 19.0% in Male, 25.4% in Female
    # Treatment(s):
    # If 1 ≤ PD Stage ≤ 2, Levo: 70.3%, DA: 15.5%, LevoDA: 14.2%;
    # If 3 ≤ PD Stage ≤ 5, Levo: 82.0%, DA: 24.7%, DBS: 24.0%, Duopa: 10.0%
    age = np.random.randint(low=70, high=80)
    if s < 59.0:
      sex = 1
    else:
      sex = 0
    if p < 83:
      pdStage = np.random.randint(low=1, high=4)
    else:
      pdStage = np.random.randint(low=4, high=6)
    if sex == 1:
      if c < 19.0:
        contra = 1
      else:
        contra = 0
    if sex == 0:
      if c < 25.4:
        contra = 1
      else:
        contra = 0
    if pdStage < 3:
      if le < 70.3:
        levoF = 1
      if da < 15.5:
        dopAgoF = 1
      if leDA < 14.2:
        levoF = 1
        dopAgoF = 1
    if pdStage >= 3:
      if le < 82.0:
        levoF = 1
      if da < 24.7:
        dopAgoF = 1
      if db < 24.0:
        dbsF = 1
      if du < 10.0:
        duopaF = 1
  if a >= 31.3 and a < 67.8:
    # Age: 80-89
    # Sex: 59.7% Male, 40.3% Female
    # PD Stage: 77% Stages 2-3, 23% Stages 4-5
    # Presence of Contraindications: 22.0% in Male, 28.4% in Female
    # Treatment(s):
    # If 2 ≤ PD Stage ≤ 3, Levo: 70.3%, DA: 15.5%, LevoDA: 14.2%;
    # If 4 ≤ PD Stage ≤ 5, Levo: 82.0%, DA: 19.7%, DBS: 22.0%, Duopa: 7.5%
    age = np.random.randint(low=80, high=90)
    if s < 59.7:
      sex = 1
    else:
      sex = 0
    if p < 77:
      pdStage = np.random.randint(low=2, high=4)
    else:
      pdStage = np.random.randint(low=4, high=6)
    if sex == 1:
      if c < 22.0:
        contra = 1
      else:
        contra = 0
    if sex == 0:
      if c < 28.4:
        contra = 1
      else:
        contra = 0
    if pdStage < 4:
      if le < 70.3:
        levoF = 1
      if da < 15.5:
        dopAgoF = 1
      if leDA < 14.2:
        levoF = 1
        dopAgoF = 1
    if pdStage >= 4:
      if le < 82.0:
        levoF = 1
      if da < 19.7:
        dopAgoF = 1
      if db < 22.0:
        dbsF = 1
      if du < 7.5:
        duopaF = 1
  if a >= 67.8 and a < 100:
    # Age: 90-99
    # Sex: 53.9% Male, 46.1% Female
    # PD Stage: 74% Stages 2-3, 26% Stages 4-5
    # Presence of Contraindications: 25.0% in Male, 31.4% in Female
    # Treatment(s):
    # If 2 ≤ PD Stage ≤ 3, Levo: 70.3%, DA: 15.5%, LevoDA: 14.2%;
    # If 4 ≤ PD Stage ≤ 5, Levo: 81.0%, DA: 14.7%, DBS: 20.0%, Duopa: 5.0%
    age = np.random.randint(low=90, high=100)
    if s < 53.9:
      sex = 1
    else:
      sex = 0
    if p < 74:
      pdStage = np.random.randint(low=2, high=4)
    else:
      pdStage = np.random.randint(low=4, high=6)
    if sex == 1:
      if c < 25.0:
        contra = 1
      else:
        contra = 0
    if sex == 0:
      if c < 31.4:
        contra = 1
      else:
        contra = 0
    if pdStage < 4:
      if le < 70.3:
        levoF = 1
      if da < 15.5:
        dopAgoF = 1
      if leDA < 14.2:
        levoF = 1
        dopAgoF = 1
    if pdStage >= 4:
      if le < 81.0:
        levoF = 1
      if da < 14.7:
        dopAgoF = 1
      if db < 20.0:
        dbsF = 1
      if du < 5.0:
        duopaF = 1

  # Examples of rules
  if age <= 70 and pdStage < 3 and levoF == 1:
    levoL = 1
    dopAgoL = 1
  elif age <= 70 and pdStage < 3:
    dopAgoL = 1
  if age <= 70 and pdStage == 3 and levoF == 1:
    levoL = 1
  elif age <= 70 and pdStage == 3:
    dbsL = 1
  if age > 70 and age <= 80 and pdStage <= 3 and levoF == 1 and dopAgoF == 1:
    levoL = 1
    dopAgo = 1
  elif age > 70 and age <= 80 and pdStage <= 3 and levoF == 1:
    levoL = 1
  elif age > 70 and age <= 80 and pdStage <= 3 and dopAgoF == 1:
    dopAgoL = 1
  if age > 80 and pdStage <= 3 and levoF == 1:
    levoL = 1
  elif age > 80 and pdStage <= 3:
    duopaL = 1
  if age > 70 and pdStage > 3 and levoF == 1:
    levoL = 1
    dbsL = 1
  elif age > 70 and age <= 80 and pdStage > 3:
    duopaL = 1
    dbsL = 1


  # Adjustments
  if dbsF == 1:
    dbsL = 0
  if pdStage <= 3:
    duopaL = 0
    dbsL = 0
  if levoL == 1 and duopaL == 1:
    levoL = 0
  if levoL == 0 and dopAgoL == 0 and dbsL == 0 and duopaL == 0:
    levoL = 1
    dopAgoL = 1
  if contra == 1:
    levoL = 0
    dopAgoL = 0
    dbsL = 0
    duopaL = 0

  # Counter
  if levoL == 1:
    levoCount += 1
  if dopAgoL == 1:
    dopAgoCount += 1
  if dbsL == 1:
    dbsCount += 1
  if duopaL == 1:
    duopaCount += 1

  # Data table value formatting
  if sex == 1:
    sex = 'Male'
  if sex == 0:
    sex = 'Female'
  if contra == 1:
    contra = 'Yes'
  else:
    contra = 'No'
  if levoF == 1:
    levoF = 'Yes'
  else:
    levoF = 'No'
  if dopAgoF == 1:
    dopAgoF = 'Yes'
  else:
    dopAgoF = 'No'
  if dbsF == 1:
    dbsF = 'Yes'
  else:
    dbsF = 'No'
  if duopaF == 1:
    duopaF = 'Yes'
  else:
    duopaF = 'No'
  if levoL == 1:
    levoL = 'Yes'
  else:
    levoL = 'No'
  if dopAgoL == 1:
    dopAgoL = 'Yes'
  else:
    dopAgoL = 'No'
  if dbsL == 1:
    dbsL = 'Yes'
  else:
    dbsL = 'No'
  if duopaL == 1:
    duopaL = 'Yes'
  else:
    duopaL = 'No'

  indpat_data = [age, sex, pdStage, contra, levoF, dopAgoF, dbsF, duopaF, levoL, dopAgoL, dbsL, duopaL]
  all_data.append(indpat_data)

patient_data = np.array(all_data)

columns = ['Age (F)', 'Sex (F)', 'PD Stage (F)', 'Contraindication(s) (F)', 'L-Dopa (F)', 'DA (F)', 'DBS (F)', 'Duopa (F)', 'L-Dopa (L)', 'DA (L)', 'DBS (L)', 'Duopa (L)']

df = pd.DataFrame(data=patient_data, columns=columns)

print(df)
print(levoCount)
print(dopAgoCount)
print(dbsCount)
print(duopaCount)

In [None]:
# @title Normalizing and Encoding Data

normalized_df = df.copy();

normalized_df['Age (F)'] = normalized_df['Age (F)'].astype(float)
normalized_df['PD Stage (F)'] = normalized_df['PD Stage (F)'].astype(float)

# Normalizing data
normalized_df['Age (F)'] = (normalized_df['Age (F)'] - 40) / 59
normalized_df['PD Stage (F)'] = (normalized_df['PD Stage (F)'] - 1) / 4

# Encoding 'Sex' column to binary
normalized_df['Sex (F)'] = np.where(normalized_df['Sex (F)'] == 'Male', 1, 0)

# Encoding other columns to binary
binary_columns = ['Contraindication(s) (F)', 'L-Dopa (F)', 'DA (F)', 'DBS (F)', 'Duopa (F)', 'L-Dopa (L)', 'DA (L)', 'DBS (L)', 'Duopa (L)']
for col in binary_columns:
  normalized_df[col] = np.where(normalized_df[col] == 'Yes', 1, 0)

print(normalized_df)

In [None]:
# @title Splitting Dataset

# Splitting dataset
train, test = train_test_split(normalized_df, test_size=0.2, shuffle=False)

x_train = train.iloc[:, 0:8]
y_train = train.iloc[:, 8:12]
x_test = test.iloc[:, 0:8]
y_test = test.iloc[:, 8:12]

print(x_train)
print(y_train)

In [None]:
# @title Defining Plotting Function

def plot_curve(epochs, hist, list_of_metrics):
  # Plotting a curve of one or more classification metrics vs. epoch
  plt.figure(figsize=(10, 6))
  plt.xlabel("Epoch")
  plt.ylabel("Value")

  for m in list_of_metrics:
    if m.startswith('f1_score_class_'):
            plt.plot(epochs[1:], hist[m][1:], label=m)
    else:
            plt.plot(epochs[1:], hist[m][1:], label=m)

    plt.legend()
    plt.ylim(0.5, 1.0)

In [None]:
# @title Defining Create Model Function

def create_model(my_learning_rate):
  # Creating and compiling a neural network

  model = tf.keras.models.Sequential()

  # First hidden layer
  model.add(tf.keras.layers.Dense(units=32, activation='relu', input_shape=(8,)))

  # Dropout regularization layer
  #model.add(tf.keras.layers.Dropout(rate=0.2))

  # Second hidden layer
  #model.add(tf.keras.layers.Dense(units=32, activation='relu', input_shape=(8,)))

  # Output layer, with units being 4 due to 4 possible binary outputs
  model.add(tf.keras.layers.Dense(units=4, activation='sigmoid'))
  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=my_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy', 'binary_accuracy'])
  return model

In [None]:
# @title Defining Train Model Function

# Setting class weights
class_weights = {0: 1.0, 1: ((levoCount*1.0)/dopAgoCount), 2: ((levoCount*1.0)/dbsCount), 3: ((levoCount*1.0)/duopaCount)}

def train_model(model, train_features, train_label, epochs,
                batch_size=None, validation_split=0.1):
  # Training model by feeding it data

  history = model.fit(x=train_features, y=train_label, batch_size=batch_size,
                      epochs=epochs, shuffle=True,
                      validation_split=validation_split, class_weight=class_weights)

  '''history = model.fit(x=train_features, y=train_label, batch_size=batch_size,
                      epochs=epochs, shuffle=True,
                      validation_split=validation_split)'''

  # Snapshot of the model's metrics at each epoch to track progression
  epochs = history.epoch
  hist = pd.DataFrame(history.history)

  # Setting model prediction threshold for sigmoid activation function
  y_pred_prob = model.predict(train_features)
  y_pred = (y_pred_prob > 0.5).astype(int)

  f1_scores = f1_score(train_label, y_pred, average=None)

  # Calculating f1_score for each label
  for i, score in enumerate(f1_scores):
    hist[f'f1_score_{y_train.columns[i]}'] = score

  # Calculating macro-average F1 score
  macro_avg_f1_score = f1_score(train_label, y_pred, average='macro')

  # Printing macro-average F1 score
  print(f"Macro-average F1 Score on Training and Validation Sets: {macro_avg_f1_score}")

  # Calculating predictions on the test set
  y_pred_test_prob = my_model.predict(x_test)
  y_pred_test = (y_pred_test_prob > 0.5).astype(int)

  # Calculating macro-average F1-score for the test set
  macro_avg_f1_score_test = f1_score(y_test, y_pred_test, average='macro')

  # Printing macro-average F1-score for the test set
  print(f"Macro-average F1 Score on the Test Set: {macro_avg_f1_score_test}")

  # Calculating confusion matrix for each label on test set
  for i, column in enumerate(y_test.columns):
    class_labels = y_test[column].values
    class_predictions = y_pred_test[:, i]
    cm = confusion_matrix(class_labels, class_predictions)
    cm = cm[::-1, ::-1]
    labels_order = ['1', '0']

    # Plotting confusion matrix using sns
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', linewidths=0.5, linecolor='black', cbar=False,
                 xticklabels=labels_order, yticklabels=labels_order)
    plt.title(f'Confusion Matrix for class {column} (Test Set)')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()

  return epochs, hist

In [None]:
# @title Running Model

# Tuning hyperparameters
learning_rate = 0.01
epochs = 30
batch_size = 1000
validation_split = 0.2

my_model = create_model(learning_rate)

# Training model on normalized training set
epochs, hist = train_model(my_model, x_train, y_train,
                           epochs, batch_size, validation_split)

# Plotting metrics vs. epochs graph
list_of_metrics_to_plot = ['accuracy', 'binary_accuracy', 'f1_score_L-Dopa (L)', 'f1_score_DA (L)', 'f1_score_DBS (L)', 'f1_score_Duopa (L)']
plot_curve(epochs, hist, list_of_metrics_to_plot)

# Evaluating against test set
print("\n Evaluate the new model against the test set:")
my_model.evaluate(x=x_test, y=y_test, batch_size=batch_size)