In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# -----------------------------
# 1) 데이터 준비
# -----------------------------
# 데이터 불러오기
cols = ["sepal_length", "sepal_width", "petal_length", "petal_width", "label"]
df = pd.read_csv("/content/drive/MyDrive/2025-2 MLP/source/chap03/data/iris.data", header=None, names=cols).dropna()

# x, y 나누기
X = df.drop(columns=["label"])
y = df["label"]

# 데이터 무작위로 훈련데이터와 테스트데이터 분류
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# -----------------------------
# 2) 모델 구성
# -----------------------------
dt = DecisionTreeClassifier(random_state=42)
rf = RandomForestClassifier(n_estimators=200, random_state=42)
lr = LogisticRegression(max_iter=500)

# -----------------------------
# 3) 모델 학습
# -----------------------------
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)

# -----------------------------
# 4) 모델 평가
# -----------------------------
dt_acc = accuracy_score(y_test, dt.predict(X_test))
rf_acc = accuracy_score(y_test, rf.predict(X_test))
lr_acc = accuracy_score(y_test, lr.predict(X_test))

print("=== Test Accuracy ===")
print(f"Decision Tree : {dt_acc:.4f}")
print(f"Random Forest : {rf_acc:.4f}")
print(f"Logistic Reg. : {lr_acc:.4f}")

=== Test Accuracy ===
Decision Tree : 0.9333
Random Forest : 0.9000
Logistic Reg. : 0.9667


In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# -----------------------------
# 1) 데이터 준비
# -----------------------------
# 데이터 불러오기
df = pd.read_csv("/content/drive/MyDrive/2025-2 MLP/source/breast_cancer.csv", index_col=0).dropna()

# x, y 나누기
X = df.drop(columns=["label"])
y = df["label"]

# 데이터 무작위로 훈련데이터와 테스트데이터 분류
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# -----------------------------
# 2) 모델 구성
# -----------------------------
dt = DecisionTreeClassifier(random_state=42)
rf = RandomForestClassifier(n_estimators=200, random_state=42)
lr = LogisticRegression(max_iter=10000)

# -----------------------------
# 3) 모델 학습
# -----------------------------
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)

# -----------------------------
# 4) 모델 평가
# -----------------------------
dt_acc = accuracy_score(y_test, dt.predict(X_test))
rf_acc = accuracy_score(y_test, rf.predict(X_test))
lr_acc = accuracy_score(y_test, lr.predict(X_test))

print("=== Test Accuracy ===")
print(f"Decision Tree : {dt_acc:.4f}")
print(f"Random Forest : {rf_acc:.4f}")
print(f"Logistic Reg. : {lr_acc:.4f}")

=== Test Accuracy ===
Decision Tree : 0.9123
Random Forest : 0.9561
Logistic Reg. : 0.9649


In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

df = pd.read_csv("/content/drive/MyDrive/2025-2 MLP/source/heart.csv").dropna()

X = df.drop(columns=["target"])
y = df["target"]

x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

dt = DecisionTreeClassifier(random_state=42)
rf = RandomForestClassifier(n_estimators=200, random_state=42)
lr = LogisticRegression(max_iter=10000)

dt.fit(x_train, y_train)
rf.fit(x_train, y_train)
lr.fit(x_train, y_train)

dt_acc = accuracy_score(y_test, dt.predict(x_test))
rf_acc = accuracy_score(y_test, rf.predict(x_test))
lr_acc = accuracy_score(y_test, lr.predict(x_test))

print("=== Test Accuracy ===")
print(f"Decision Tree : {dt_acc:.4f}")
print(f"Random Forest : {rf_acc:.4f}")
print(f"Logistic Reg. : {lr_acc:.4f}")

=== Test Accuracy ===
Decision Tree : 0.7049
Random Forest : 0.8197
Logistic Reg. : 0.8033


In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

df = pd.read_csv("/content/drive/MyDrive/2025-2 MLP/source/chap02/data/car_evaluation.csv").dropna()

# Apply one-hot encoding to categorical columns
df = pd.get_dummies(df, columns=['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety'])


X = df.drop(columns=["output"])
y = df["output"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

dt = DecisionTreeClassifier(random_state=42)
rf = RandomForestClassifier(n_estimators=200, random_state=42)
lr = LogisticRegression(max_iter=10000)

dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)

dt_acc = accuracy_score(y_test, dt.predict(X_test))
rf_acc = accuracy_score(y_test, rf.predict(X_test))
lr_acc = accuracy_score(y_test, lr.predict(X_test))

print("=== Test Accuracy ===")
print(f"Decision Tree : {dt_acc:.4f}")
print(f"Random Forest : {rf_acc:.4f}")
print(f"Logistic Reg. : {lr_acc:.4f}")

=== Test Accuracy ===
Decision Tree : 0.9740
Random Forest : 0.9798
Logistic Reg. : 0.9017


In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# 데이터 불러오기
df_le = pd.read_csv("/content/drive/MyDrive/2025-2 MLP/source/chap02/data/car_evaluation.csv").dropna()

# 레이블 인코딩 적용
categorical_cols = ['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety', 'output']
le = LabelEncoder()

for col in categorical_cols:
    df_le[col] = le.fit_transform(df_le[col])

# x, y 나누기
X_le = df_le.drop(columns=["output"])
y_le = df_le["output"]

# 데이터 무작위로 훈련데이터와 테스트데이터 분류
X_train_le, X_test_le, y_train_le, y_test_le = train_test_split(
    X_le, y_le, test_size=0.2, stratify=y_le, random_state=42
)

# -----------------------------
# 2) 모델 구성
# -----------------------------
dt_le = DecisionTreeClassifier(random_state=42)
rf_le = RandomForestClassifier(n_estimators=200, random_state=42)
lr_le = LogisticRegression(max_iter=10000)

# -----------------------------
# 3) 모델 학습
# -----------------------------
dt_le.fit(X_train_le, y_train_le)
rf_le.fit(X_train_le, y_train_le)
lr_le.fit(X_train_le, y_train_le)

# -----------------------------
# 4) 모델 평가
# -----------------------------
dt_acc_le = accuracy_score(y_test_le, dt_le.predict(X_test_le))
rf_acc_le = accuracy_score(y_test_le, rf_le.predict(X_test_le))
lr_acc_le = accuracy_score(y_test_le, lr_le.predict(X_test_le))

print("=== Test Accuracy (Label Encoding) ===")
print(f"Decision Tree : {dt_acc_le:.4f}")
print(f"Random Forest : {rf_acc_le:.4f}")
print(f"Logistic Reg. : {lr_acc_le:.4f}")

=== Test Accuracy (Label Encoding) ===
Decision Tree : 0.9855
Random Forest : 0.9798
Logistic Reg. : 0.6908


In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("/content/drive/MyDrive/2025-2 MLP/source/diabetes.csv").dropna()

X = df.drop(columns=["Outcome"])
y = df["Outcome"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

dt = DecisionTreeClassifier(random_state=42)
rf = RandomForestClassifier(n_estimators=200, random_state=42)
lr = LogisticRegression(max_iter=10000)

dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)

dt_acc = accuracy_score(y_test, dt.predict(X_test))
rf_acc = accuracy_score(y_test, rf.predict(X_test))
lr_acc = accuracy_score(y_test, lr.predict(X_test))

print("=== Test Accuracy ===")
print(f"Decision Tree : {dt_acc:.4f}")
print(f"Random Forest : {rf_acc:.4f}")
print(f"Logistic Reg. : {lr_acc:.4f}")

=== Test Accuracy ===
Decision Tree : 0.7273
Random Forest : 0.7468
Logistic Reg. : 0.7143
