In [3]:
import pandas as pd
import numpy as np
from math import log2
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# ---------- 1. داده‌ها ----------
data = {
    'Outlook':    ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast',
                   'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temperature':['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool',
                   'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity':   ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal',
                   'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Wind':       ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong',
                   'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes',
                   'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}
df = pd.DataFrame(data)

# ---------- 2. تبدیل برچسب به عدد ----------
le = LabelEncoder()
df['PlayTennis'] = le.fit_transform(df['PlayTennis'])  # No=0, Yes=1

# ---------- 3. توابع کمکی ----------
def entropy(vals):
    total = len(vals)
    if total == 0:
        return 0
    counts = np.bincount(vals)
    probs = counts / total
    return -np.sum([p * log2(p) for p in probs if p > 0])

def gain_ratio(df, feature, target):
    total = len(df)
    feature_values = df[feature].unique()
    gain = entropy(df[target].values)
    intrinsic_value = 0

    for val in feature_values:
        subset = df[df[feature] == val]
        prob = len(subset) / total
        ent = entropy(subset[target].values)
        gain -= prob * ent
        intrinsic_value -= prob * log2(prob) if prob > 0 else 0

    return gain / intrinsic_value if intrinsic_value != 0 else 0

# ---------- 4. ساخت درخت به سبک C4.5 ----------
class C45Node:
    def __init__(self, feature=None, label=None):
        self.feature = feature
        self.label = label
        self.children = {}

    def predict(self, row):
        if self.label is not None:
            return self.label
        val = row[self.feature]
        child = self.children.get(val)
        return child.predict(row) if child else 1  # پیش‌فرض: Yes

def build_c45_tree(df, target, features):
    if len(df[target].unique()) == 1:
        return C45Node(label=df[target].iloc[0])
    if not features:
        return C45Node(label=df[target].mode()[0])

    best_feature = max(features, key=lambda f: gain_ratio(df, f, target))
    root = C45Node(feature=best_feature)

    for val in df[best_feature].unique():
        subset = df[df[best_feature] == val]
        if subset.empty:
            root.children[val] = C45Node(label=df[target].mode()[0])
        else:
            new_features = [f for f in features if f != best_feature]
            root.children[val] = build_c45_tree(subset, target, new_features)

    return root

# ---------- 5. ساخت درخت و پیش‌بینی ----------
target = 'PlayTennis'
features = [col for col in df.columns if col != target]
tree = build_c45_tree(df, target, features)

y_true = df[target].values
y_pred = [tree.predict(row) for _, row in df.iterrows()]

# ---------- 6. ارزیابی مدل ----------
cm = confusion_matrix(y_true, y_pred)
TN, FP, FN, TP = cm.ravel()
accuracy    = accuracy_score(y_true, y_pred)
precision   = precision_score(y_true, y_pred)
recall      = recall_score(y_true, y_pred)  # Sensitivity
specificity = TN / (TN + FP)
f1          = f1_score(y_true, y_pred)
spb         = recall + specificity - 1  # Youden's J

# ---------- 7. نمایش نتایج ----------
print("Confusion Matrix:")
print(cm)
print("\nEvaluation Metrics:")
print(f"Accuracy     = {accuracy:.2f}")
print(f"Precision    = {precision:.2f}")
print(f"Sensitivity  = {recall:.2f}")
print(f"Specificity  = {specificity:.2f}")
print(f"F1 Score     = {f1:.2f}")
print(f"SPB (J-index)= {spb:.2f}")


Confusion Matrix:
[[5 0]
 [0 9]]

Evaluation Metrics:
Accuracy     = 1.00
Precision    = 1.00
Sensitivity  = 1.00
Specificity  = 1.00
F1 Score     = 1.00
SPB (J-index)= 1.00
