In [1]:
import sys
from pathlib import Path

# Add the OriginalCode directory to the Python path
original_code_path = Path("OriginalCode").resolve()
if original_code_path not in sys.path:
    sys.path.append(str(original_code_path))

In [2]:
from EnderClassifier import EnderClassifier

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

df = pd.read_csv("/home/maciej/Desktop/Maciej/endometriosis/endometriosis.csv")
X = df.drop(columns=['target'])
y = df['target'].astype('int')
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [4]:
ender = EnderClassifier(verbose=False)
ender.fit(X_train, y_train, y_test=y_test, X_test=X_test)

In [5]:
# Done in 1m 28s
ender.evaluate_all_rules()

100%|██████████| 100/100 [00:00<00:00, 519.86it/s]
100%|██████████| 100/100 [00:00<00:00, 1043.28it/s]


In [6]:
y_train_pred = ender.predict(X_train)
y_test_pred = ender.predict(X_test)

from CalculateMetrics import calculate_all_metrics

train_metrics = calculate_all_metrics(y_train, y_train_pred)
test_metrics = calculate_all_metrics(y_test, y_test_pred)

In [7]:
print("Train metrics:")
print(train_metrics)
print("Test metrics:")
print(test_metrics)

Train metrics:
{'accuracy': 1.0, 'f1': 0.0, 'mean_absolute_error': None}
Test metrics:
{'accuracy': 0.8131313131313131, 'f1': 0.0, 'mean_absolute_error': None}


In [8]:
print("Max train accuracy:", max(ender.history['accuracy']))
print("Max test accuracy:", max(ender.history['accuracy_test']))

Max train accuracy: 1.0
Max test accuracy: 0.8181818181818182


In [9]:
y_pred = np.array([np.argmax(pred) for pred in y_test_pred])

In [10]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}\n")
print("Classification Report - Test:")
print(classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)
labels = ['No Endometriosis', 'Endometriosis']
print("Confusion Matrix:")
print(f"{'':<20}{labels[0]:<20}{labels[1]:<20}")
print(f"{labels[0]:<20}{cm[0, 0]:<20}{cm[0, 1]:<20}  # True Negative (TN), False Positive (FP)")
print(f"{labels[1]:<20}{cm[1, 0]:<20}{cm[1, 1]:<20}  # False Negative (FN), True Positive (TP)")

Accuracy: 0.81

Classification Report - Test:
              precision    recall  f1-score   support

           0       0.85      0.92      0.88       150
           1       0.66      0.48      0.55        48

    accuracy                           0.81       198
   macro avg       0.75      0.70      0.72       198
weighted avg       0.80      0.81      0.80       198

Confusion Matrix:
                    No Endometriosis    Endometriosis       
No Endometriosis    138                 12                    # True Negative (TN), False Positive (FP)
Endometriosis       25                  23                    # False Negative (FN), True Positive (TP)


In [13]:
print("XGBoost:")
from xgboost import XGBClassifier

xgb = XGBClassifier()
xgb.fit(X_train, y_train)
y_pred = xgb.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}\n")
print("Classification Report - Test:")
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
labels = ['No Endometriosis', 'Endometriosis']
print("Confusion Matrix:")
print(f"{'':<20}{labels[0]:<20}{labels[1]:<20}")
print(f"{labels[0]:<20}{cm[0, 0]:<20}{cm[0, 1]:<20}  # True Negative (TN), False Positive (FP)")
print(f"{labels[1]:<20}{cm[1, 0]:<20}{cm[1, 1]:<20}  # False Negative (FN), True Positive (TP)")

XGBoost:
Accuracy: 0.81

Classification Report - Test:
              precision    recall  f1-score   support

           0       0.85      0.91      0.88       150
           1       0.65      0.50      0.56        48

    accuracy                           0.81       198
   macro avg       0.75      0.71      0.72       198
weighted avg       0.80      0.81      0.80       198

Confusion Matrix:
                    No Endometriosis    Endometriosis       
No Endometriosis    137                 13                    # True Negative (TN), False Positive (FP)
Endometriosis       24                  24                    # False Negative (FN), True Positive (TP)


In [12]:
# Ender regressor as classifier
from EnderRegressor import EnderRegressor

ender_regressor = EnderRegressor()
ender_regressor.fit(X_train, y_train)

y_pred = ender_regressor.predict(X_test)
predictions = np.array([0 if pred < 0.5 else 1 for pred in y_pred])

print("Ender Regressor as Classifier")
print(f"Accuracy: {accuracy_score(y_test, predictions):.2f}\n")
print("Classification Report - Test:")
print(classification_report(y_test, predictions))
cm = confusion_matrix(y_test, predictions)
labels = ['No Endometriosis', 'Endometriosis']
print("Confusion Matrix:")
print(f"{'':<20}{labels[0]:<20}{labels[1]:<20}")
print(f"{labels[0]:<20}{cm[0, 0]:<20}{cm[0, 1]:<20}  # True Negative (TN), False Positive (FP)")
print(f"{labels[1]:<20}{cm[1, 0]:<20}{cm[1, 1]:<20}  # False Negative (FN), True Positive (TP)")


default_value (rule): [0.24025974025974026]
####################################################################################
Rule: 1
	sex_pain_level >= 3.5
	chronic_pain_level >= 0.5
	height >= 157.0
	age >= 26.5
	menstruation_pain >= 5.5
	smoking_time <= 23.5
	weight <= 114.5
	first_menstruation >= 6.5
	alcohol_units <= 8.05
=> Decision 0.6870129870129869

####################################################################################
Rule: 2
	defecation_pain_during <= 1.5
	chronic_pain_level <= 3.5
	family_endo <= 0.5
	menstruation_pain <= 8.5
	menstruation_lenght <= 27.0
	constipation_during >= 0.5
	stomach_ache_apart >= 0.5
	cycle <= 45.0
=> Decision -0.2225720620842569

####################################################################################
Rule: 3
	sex_bleeding <= 0.5
	sex_cramps >= 0.5
	weight <= 74.0
	sex_pain_level <= 6.5
	smoking_time >= -2.5
=> Decision 0.7597402597402596

#################################################################################