<a href="https://colab.research.google.com/github/hykim-1/Study/blob/main/winequality_white.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score

# -----------------------------
# 1) 데이터 준비
# -----------------------------
df = pd.read_csv("/content/drive/MyDrive/winequality-white.csv",
                 delimiter=";", encoding="utf-8")
df = df.dropna().reset_index(drop=True)

X = df.drop(columns=["quality"])  # 특징들
y = df["quality"]                 # 라벨

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# -----------------------------
# 2) 모델 구성
# -----------------------------
dt = DecisionTreeClassifier(random_state=42)
rf = RandomForestClassifier(n_estimators=200, random_state=42)

# Logistic Regression은 스케일링 필요 → 파이프라인으로
lr = make_pipeline(StandardScaler(),
                   LogisticRegression(max_iter=500, random_state=42))

# KNN 추가 (스케일링 필요 → 파이프라인)
knn = make_pipeline(StandardScaler(),
                    KNeighborsClassifier(n_neighbors=5))

# -----------------------------
# 3) 모델 학습
# -----------------------------
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)
knn.fit(X_train, y_train)

# -----------------------------
# 4) 모델 평가
# -----------------------------
dt_acc = accuracy_score(y_test, dt.predict(X_test))
rf_acc = accuracy_score(y_test, rf.predict(X_test))
lr_acc = accuracy_score(y_test, lr.predict(X_test))
knn_acc = accuracy_score(y_test, knn.predict(X_test))

print("=== Test Accuracy ===")
print(f"Decision Tree : {dt_acc:.4f}")
print(f"Random Forest : {rf_acc:.4f}")
print(f"Logistic Reg. : {lr_acc:.4f}")
print(f"KNN           : {knn_acc:.4f}")

# -----------------------------
# 5) Logistic Regression 계수 확인 (선택)
# -----------------------------
lr_model = lr.named_steps["logisticregression"]
print("Intercept:", lr_model.intercept_)
print("Coefficients:", lr_model.coef_)


=== Test Accuracy ===
Decision Tree : 0.5908
Random Forest : 0.6776
Logistic Reg. : 0.5490
KNN           : 0.5265
Intercept: [-2.247293   -0.34893958  2.39856544  3.10092408  1.77294462 -0.14277672
 -4.53342483]
Coefficients: [[ 0.55020534  0.44884663 -0.19789828 -0.36562297  0.17101653  0.52509939
   0.03736622  0.44522807  0.05997201 -0.3500447  -0.15756775]
 [-0.10720662  0.65149951 -0.10638101 -1.17830288  0.27477523 -0.67573883
  -0.0990405   1.01274465 -0.23683824 -0.04547606 -0.61711574]
 [-0.42292645  0.29573532  0.01385534 -0.64396042  0.19537251 -0.1111242
   0.10830328  0.69575742 -0.46952327 -0.10335691 -0.83622904]
 [-0.47283279 -0.31343466  0.05678    -0.15758986  0.19249767  0.01668918
   0.06811089  0.3757548  -0.3945216   0.06455331  0.10210288]
 [-0.02152366 -0.46553471 -0.04614464  0.87549533 -0.11141829  0.12889444
   0.04244281 -1.05738403  0.05066973  0.23933293  0.16714359]
 [-0.11652116 -0.47146864  0.03330688  1.24112916  0.08948699  0.28966614
  -0.01527144 -1