In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, precision_score, f1_score, recall_score, roc_auc_score, log_loss, confusion_matrix

In [2]:
data = pd.read_csv('merged_1.1.csv')

In [3]:
predictors = ['HGB_LC', 'PLATLC', 'WBC_LC', 'LYMPLC', 'POLYLC', 'CREALF', 'BUN_LF']
target = 'dominant_class'

In [4]:
filtered_data = data[predictors + [target]].dropna()

In [5]:
X = filtered_data[predictors]
y = filtered_data[target]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
logistic_model = LogisticRegression(max_iter=1000)
logistic_model.fit(X_train, y_train)

In [8]:
y_pred = logistic_model.predict(X_test)

In [21]:
report = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)
logloss = log_loss(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

In [22]:
print("Classification Report:")
print(report)
print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"Precision: {precision:.2f}")
print(f"ROC AUC Score: {roc_auc:.2f}")
print(f"Log Loss: {logloss:.2f}")
print("Confusion Matrix:")
print(conf_matrix)

Classification Report:
              precision    recall  f1-score   support

           0       0.63      0.98      0.77       316
           1       0.22      0.01      0.02       181

    accuracy                           0.63       497
   macro avg       0.43      0.49      0.39       497
weighted avg       0.48      0.63      0.50       497

Accuracy: 0.63
Recall: 0.01
F1 Score: 0.02
Precision: 0.22
ROC AUC Score: 0.49
Log Loss: 13.49
Confusion Matrix:
[[309   7]
 [179   2]]


In [23]:
intercept = logistic_model.intercept_[0]
coefficients = logistic_model.coef_[0]

In [24]:
equation = f"log-odds = {intercept:.4f}"
for i, coef in enumerate(coefficients):
    equation += f" + ({coef:.4f} * {predictors[i]})"

In [25]:
print("\nLogistic Regression Equation:")
print(equation)


Logistic Regression Equation:
log-odds = -1.8711 + (0.0108 * HGB_LC) + (0.0021 * PLATLC) + (0.0423 * WBC_LC) + (0.1144 * LYMPLC) + (0.1222 * POLYLC) + (-0.3033 * CREALF) + (0.0097 * BUN_LF)


In [26]:
y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,