In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)


# --- Case A: ไม่ตัดเลย
X_train_full, X_test_full, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42
)


model_full = LogisticRegression(random_state=42, solver='lbfgs', max_iter=200)
model_full.fit(X_train_full, y_train)
acc_full = accuracy_score(y_test, model_full.predict(X_test_full))

# --- Case B: ตัด 1 ฟีเจอร์ (Threshold 0.035 -> ตัด Sepal Width) ---
selector_1 = VarianceThreshold(threshold=0.035)
X_cut1 = selector_1.fit_transform(X_scaled)

X_train_cut1, X_test_cut1, y_train, y_test = train_test_split(
    X_cut1, y, test_size=0.3, random_state=42
)

model_cut1 = LogisticRegression(random_state=42, solver='lbfgs', max_iter=200)
model_cut1.fit(X_train_cut1, y_train)
acc_cut1 = accuracy_score(y_test, model_cut1.predict(X_test_cut1))

# --- Case C: ตัด 2 ฟีเจอร์ (Threshold 0.06 -> ตัด Sepal Width + Length) ---
selector_2 = VarianceThreshold(threshold=0.06)
X_cut2 = selector_2.fit_transform(X_scaled)

X_train_cut2, X_test_cut2, y_train, y_test = train_test_split(
    X_cut2, y, test_size=0.3, random_state=42
)

model_cut2 = LogisticRegression(random_state=42, solver='lbfgs', max_iter=200)
model_cut2.fit(X_train_cut2, y_train)
acc_cut2 = accuracy_score(y_test, model_cut2.predict(X_test_cut2))

results = {
    "Experiment": ["Full Features", "Cut 1 Feature (Th 0.035)", "Cut 2 Features (Th 0.06)"],
    "Features Remaining": [X_scaled.shape[1], X_cut1.shape[1], X_cut2.shape[1]],
    "Accuracy": [acc_full, acc_cut1, acc_cut2]
}

df_compare = pd.DataFrame(results)
pd.options.display.float_format = '{:.4f}'.format

print("--- ผลลัพธ์การเปรียบเทียบ (Logistic Regression) ---")
print(df_compare.to_string(index=False))

--- ผลลัพธ์การเปรียบเทียบ (Logistic Regression) ---
              Experiment  Features Remaining  Accuracy
           Full Features                   4    0.9111
Cut 1 Feature (Th 0.035)                   3    0.9556
Cut 2 Features (Th 0.06)                   2    0.9778
