In [9]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from statsmodels.stats.contingency_tables import mcnemar

# Load the dataset
data = pd.read_csv("/content/mc.csv")

# Encode categorical features
le_salary = LabelEncoder()
data["salary"] = le_salary.fit_transform(data["salary"])

le_department = LabelEncoder()
data["department"] = le_department.fit_transform(data["department"])

# Features and target
x = data[[
    "satisfaction_level",
    "last_evaluation",
    "number_project",
    "average_montly_hours",
    "time_spend_company",
    "work_accident",
    "promotion_last_5years",
    "salary",
    "department",
    "age",
    "remote_work",
    "training_hours"
]]
y = data["left"]

# Train/test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Decision Tree
dt = DecisionTreeClassifier(criterion="entropy")
dt.fit(x_train, y_train)
y_dt_pred = dt.predict(x_test)

# Logistic Regression
lr = LogisticRegression(max_iter=1000)
lr.fit(x_train, y_train)
y_lr_pred = lr.predict(x_test)

# Accuracy scores
print("Decision Tree Accuracy:", accuracy_score(y_test, y_dt_pred))
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_lr_pred))

# ---------------------------
# McNemar’s Test
# ---------------------------
# Build contingency table
table = [[0, 0], [0, 0]]

for i in range(len(y_test)):
    if y_test.iloc[i] == y_dt_pred[i] and y_test.iloc[i] != y_lr_pred[i]:
        table[0][1] += 1  # DT correct, LR wrong
    elif y_test.iloc[i] != y_dt_pred[i] and y_test.iloc[i] == y_lr_pred[i]:
        table[1][0] += 1  # LR correct, DT wrong
    elif y_test.iloc[i] == y_dt_pred[i] and y_test.iloc[i] == y_lr_pred[i]:
        table[0][0] += 1  # both correct
    else:
        table[1][1] += 1  # both wrong

print("Contingency Table (McNemar):", table)

# Run McNemar’s test
result = mcnemar(table, exact=True)
print("McNemar’s Test Statistic:", result.statistic)
print("McNemar’s Test p-value:", result.pvalue)

if result.pvalue < 0.05:
    print("Significant difference between models (reject H0)")
else:
    print("No significant difference between models (fail to reject H0)")

Decision Tree Accuracy: 0.555
Logistic Regression Accuracy: 0.695
Contingency Table (McNemar): [[87, 24], [52, 37]]
McNemar’s Test Statistic: 24.0
McNemar’s Test p-value: 0.001761872002940625
Significant difference between models (reject H0)
