<a href="https://colab.research.google.com/github/VladimirBoshnjakovski/explainable-ai-thesis-code/blob/main/04_whitebox_RIPER.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ──────────────── DATA HANDLING ────────────────
from google.colab import files
import pandas as pd

# ──────────────── VISUALIZATION ────────────────
import matplotlib.pyplot as plt   # Basic plotting
import seaborn as sns            # Advanced statistical plots

# ──────────────── MACHINE LEARNING ────────────────
import wittgenstein as lw                         # Rule-based classifiers (e.g., RIPPER)
from sklearn.model_selection import train_test_split  # Data splitting
from sklearn.metrics import (                        # Evaluation metrics
    accuracy_score,
    classification_report,
    confusion_matrix
)

Collecting wittgenstein
  Downloading wittgenstein-0.3.4-py3-none-any.whl.metadata (11 kB)
Downloading wittgenstein-0.3.4-py3-none-any.whl (110 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/110.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.6/110.6 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: wittgenstein
Successfully installed wittgenstein-0.3.4


In [2]:
# ──────────────── LOAD DATASET FROM LOCAL FILE (Colab Upload) ────────────────
uploaded = files.upload()  # Opens file picker to upload from your computer
df = pd.read_csv(next(iter(uploaded)))  # Loads the first uploaded file into a DataFrame

Saving encoded_heart_disease_data.csv to encoded_heart_disease_data (1).csv


In [4]:
# ──────────────── FEATURE AND TARGET DEFINITION ────────────────
# Separate features (X) and target (y) from the DataFrame
X = df.drop(columns=['Presence of Heart Disease (1=Yes)'])  # All columns except target
y = df['Presence of Heart Disease (1=Yes)'].astype(int)      # Ensure target is integer-encoded

# ──────────────── TRAIN-TEST SPLIT ────────────────
# Split the data into training (70%) and testing (30%) sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# ──────────────── MODEL TRAINING (RIPPER) ────────────────
# Initialize and train the RIPPER rule-based classifier
ripper = lw.RIPPER()
ripper.fit(X_train, y_train)

# ──────────────── PREDICTION AND EVALUATION ────────────────
# Predict on the test set
y_pred = ripper.predict(X_test)

# Print accuracy score
print(f"✅ RIPPER Accuracy: {accuracy_score(y_test, y_pred):.4f}\n")

# Print classification report (precision, recall, F1-score)
print("📊 Classification Report:")
print(classification_report(y_test, y_pred))

# Print confusion matrix
print("🧮 Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# ──────────────── RULE INSPECTION ────────────────
# Output the rules learned by the RIPPER algorithm
print("\n📜 Learned RIPPER Rules:")
print(ripper.ruleset_)


✅ RIPPER Accuracy: 0.7778

📊 Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.69      0.71        36
           1       0.80      0.83      0.82        54

    accuracy                           0.78        90
   macro avg       0.77      0.76      0.77        90
weighted avg       0.78      0.78      0.78        90

🧮 Confusion Matrix:
[[25 11]
 [ 9 45]]

📜 Learned RIPPER Rules:
[[ChestPain:TypicalAngina=0^TypeofThalassemia=2] V [NumberofMajorVesselsColoredbyFluoroscopy=0^Exercise-InducedAngina(1=Yes)=0^TypeofThalassemia=2] V [RestingBloodPressure(mmHg)=<110.0^SlopeofthePeakExerciseSTSegment=2] V [STDepressionInducedbyExercise=0.1-0.4] V [ChestPainType(4Categories)=3]]


In [5]:
def find_matching_rule_safe(ruleset, instance_df):
    """
    Attempts to find the first RIPPER rule that matches a single input instance.

    Parameters:
    - ruleset: Trained RIPPER ruleset (ripper.ruleset_)
    - instance_df: A single-row DataFrame representing one observation

    Returns:
    - The first matching rule object if a rule applies
    - A string message if no rule matches (i.e., default rule applies)
    """

    # Ensure instance is treated as object dtype for compatibility with rule checks
    instance_df = instance_df.astype(object)

    for rule in ruleset.rules:
        result = rule.covers(instance_df)  # Check if rule covers the instance

        if not result.empty:
            val = result.iloc[0]

            # Extract value depending on data type returned
            if isinstance(val, pd.Series):
                val = val.iloc[0]
            elif isinstance(val, (np.ndarray, list)):
                val = val[0]

            # If rule applies, return it
            if bool(val) is True:
                return rule

    # If no rule matched, return default message
    return "No rule matched (default rule used)"


In [6]:
def explain_rule_readably(rule):
    """
    Converts a RIPPER rule into a human-readable text format.

    Parameters:
    - rule: A single rule object from the RIPPER ruleset, or a string if no rule matched.

    Returns:
    - A readable string version of the rule.
    """

    # Handle default rule case
    if rule == "No rule matched (default rule used)":
        return rule

    readable_conditions = []

    for cond in rule.conds:
        feature = cond.feature
        value = cond.val

        # Custom mappings for manually one-hot encoded features
        if feature.startswith('Fluoroscopy'):
            vessels = feature.split(":")[-1]
            readable_conditions.append(f"Fluoroscopy = {vessels.replace('Vessels', ' Vessels')}")

        elif feature == 'Exercise-InducedAngina:Yes':
            readable_conditions.append("Exercise-Induced Angina = No")  # Because it's absence of "Yes"

        elif feature == 'Exercise-InducedAngina:No':
            readable_conditions.append("Exercise-Induced Angina = Yes")

        elif feature == 'Sex:Female':
            readable_conditions.append("Sex = Female")

        elif feature == 'Sex:Male':
            readable_conditions.append("Sex = Male")

        elif ":" in feature:
            # Generic one-hot encoded feature handling
            base, label = feature.split(":", 1)
            readable_conditions.append(f"{base.strip()} = {label.strip()}")

        else:
            # Fallback for continuous or other features
            readable_conditions.append(f"{feature} = {value}")

    return "IF " + " AND ".join(readable_conditions) + " THEN Heart Disease = Yes"


In [8]:
# ──────────────── FIND FIRST TEST INSTANCE WITH A MATCHED RULE ────────────────
for idx in range(len(X_test)):
    # Select a single test instance (as DataFrame)
    instance = X_test.iloc[[idx]]

    # Get the true and predicted labels
    true_label = y_test.iloc[idx]
    predicted = ripper.predict(instance)[0]

    # Identify the matching rule, if any
    matched_rule = find_matching_rule_safe(ripper.ruleset_, instance)

    # Proceed only if a specific (non-default) rule matched the instance
    if matched_rule != "No rule matched (default rule used)":
        print(f"\n✅ Found instance with non-default rule at index #{idx}")
        print(f"✅ True label: {true_label}")
        print(f"🤖 Predicted:  {predicted}")
        print("📜 Matched rule:")
        print(matched_rule)
        break  # Stop after the first matched instance is found



✅ Found instance with non-default rule at index #0
✅ True label: 1
🤖 Predicted:  True
📜 Matched rule:
[ChestPain:TypicalAngina=0^TypeofThalassemia=2]


In [9]:
# ──────────────── IDENTIFY AND EXPLAIN FIRST NON-DEFAULT RULE MATCH ────────────────
for idx in range(len(X_test)):
    # Extract the instance (as a one-row DataFrame)
    instance = X_test.iloc[[idx]]

    # Get the true label and the model's prediction
    true_label = y_test.iloc[idx]
    predicted = ripper.predict(instance)[0]

    # Find which rule (if any) covers this instance
    matched_rule = find_matching_rule_safe(ripper.ruleset_, instance)

    # If a specific rule (not the default) matched, display details
    if matched_rule != "No rule matched (default rule used)":
        print(f"\n✅ Found instance with non-default rule at index #{idx}")
        print(f"✅ True label: {true_label}")
        print(f"🤖 Predicted:  {predicted}")

        # Show raw rule object
        print("\n📜 Matched rule (raw):")
        print(matched_rule)

        # Show human-readable explanation
        print("\n📜 Matched rule (readable):")
        print(explain_rule_readably(matched_rule))
        break  # Exit after first match



✅ Found instance with non-default rule at index #0
✅ True label: 1
🤖 Predicted:  True

📜 Matched rule (raw):
[ChestPain:TypicalAngina=0^TypeofThalassemia=2]

📜 Matched rule (readable):
IF Chest Pain = Typical Angina AND Type of Thalassemia = 2 THEN Heart Disease = Yes
