In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, roc_auc_score

# Load datasets
train_data = pd.read_csv("data/train_set_basic.csv")
test_data = pd.read_csv("data/test_set_constant.csv")

# Inspect the columns in the dataset
print("Train Data Columns:", train_data.columns)
print("Test Data Columns:", test_data.columns)

# Adjust feature columns based on the actual columns in your dataset
feature_columns = [
    'hemoglobin', 'ferritin', 'ret_count', 'segmented_neutrophils', 'tibc',
    'mcv', 'serum_iron', 'rbc', 'gender', 'creatinine', 'cholestrol',
    'copper', 'ethanol', 'folate', 'glucose', 'hematocrit', 'tsat'
]
target_column = 'label'

# Ensure all feature columns exist in the dataset
missing_columns = [col for col in feature_columns if col not in train_data.columns]
if missing_columns:
    print(f"Missing columns in train data: {missing_columns}")
else:
    # Split train and test data
    X_train = train_data[feature_columns].values
    y_train = train_data[target_column].values

    X_test = test_data[feature_columns].values
    y_test = test_data[target_column].values

    # Standardize the data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Convert labels to categorical one-hot encoding
    y_train_cat = tf.keras.utils.to_categorical(y_train)
    y_test_cat = tf.keras.utils.to_categorical(y_test)

    # Initialize Feedforward Neural Network
    model = Sequential()
    model.add(Dense(64, input_dim=len(feature_columns), activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))

    # Compile the model
    model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

    # Train the model
    model.fit(X_train, y_train_cat, epochs=50, batch_size=32, verbose=1, validation_split=0.2)

    # Predictions
    y_pred_cat = model.predict(X_test)
    y_pred = np.argmax(y_pred_cat, axis=1)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    roc_auc = roc_auc_score(y_test_cat, y_pred_cat, multi_class='ovr')

    print("Accuracy:", accuracy)
    print("F1 Score:", f1)
    print("ROC AUC Score:", roc_auc)
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

Train Data Columns: Index(['hemoglobin', 'ferritin', 'ret_count', 'segmented_neutrophils', 'tibc',
       'mcv', 'serum_iron', 'rbc', 'gender', 'creatinine', 'cholestrol',
       'copper', 'ethanol', 'folate', 'glucose', 'hematocrit', 'tsat',
       'label'],
      dtype='object')
Test Data Columns: Index(['hemoglobin', 'ferritin', 'ret_count', 'segmented_neutrophils', 'tibc',
       'mcv', 'serum_iron', 'rbc', 'gender', 'creatinine', 'cholestrol',
       'copper', 'ethanol', 'folate', 'glucose', 'hematocrit', 'tsat',
       'label'],
      dtype='object')
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1260/1260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6217 - loss: 1.0889 - val_accuracy: 0.8473 - val_loss: 0.4211
Epoch 2/50
[1m1260/1260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8579 - loss: 0.3841 - val_accuracy: 0.8873 - val_loss: 0.2886
Epoch 3/50
[1m1260/1260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8961 - loss: 0.2701 - val_accuracy: 0.9109 - val_loss: 0.2326
Epoch 4/50
[1m1260/1260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9175 - loss: 0.2114 - val_accuracy: 0.9286 - val_loss: 0.1864
Epoch 5/50
[1m1260/1260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9313 - loss: 0.1732 - val_accuracy: 0.9373 - val_loss: 0.1585
Epoch 6/50
[1m1260/1260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9420 - loss: 0.1462 - val_accuracy: 0.9402 - val_loss: 0.1454
Epoch 7/50
[1m1260/1260[0