In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt

In [8]:
file_path = './wine.csv'
df = pd.read_csv(file_path)

print("=== 결측치 현황 ===")
print(df.isnull().sum())

print("\n=== 레이블 분포 ===")
print(df['Wine'].value_counts())

# 레이블 인코딩
label_encoder = LabelEncoder()
encoded_df = df.copy()
for column in df.columns:
    encoded_df[column] = label_encoder.fit_transform(df[column])

# 특성과 레이블 분리
X = encoded_df.drop('Wine', axis=1)
y = encoded_df['Wine']

=== 결측치 현황 ===
Wine                    0
Alcohol                 0
Malic.acid              0
Ash                     0
Acl                     0
Mg                      0
Phenols                 0
Flavanoids              0
Nonflavanoid.phenols    0
Proanth                 0
Color.int               0
Hue                     0
OD                      0
Proline                 0
dtype: int64

=== 레이블 분포 ===
Wine
2    71
1    59
3    48
Name: count, dtype: int64


In [10]:
# 훈련 및 테스트 세트 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# X_train, X_test, y_train, y_test의 shape 확인
print("\n=== 데이터셋 Shape ===")
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)



=== 데이터셋 Shape ===
X_train shape: (142, 13)
X_test shape: (36, 13)
y_train shape: (142,)
y_test shape: (36,)


In [12]:
#dt
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, y_pred_dt)
print("Decision Tree Accuracy:", dt_accuracy)

#rf
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_pred_rf)
print("Random Forest Accuracy:", rf_accuracy)

#svm
svm_model = SVC(random_state=42)
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, y_pred_svm)
print("SVM Accuracy:", svm_accuracy)

#lr
lr_model = LogisticRegression(max_iter=1000, random_state=42)
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)
lr_accuracy = accuracy_score(y_test, y_pred_lr)
print("Logistic Regression Accuracy:", lr_accuracy)

# KNN
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_test)
knn_accuracy = accuracy_score(y_test, y_pred_knn)
print("KNN Accuracy:", knn_accuracy)

Decision Tree Accuracy: 0.9444444444444444
Random Forest Accuracy: 1.0
SVM Accuracy: 1.0
Logistic Regression Accuracy: 1.0
KNN Accuracy: 0.9444444444444444


In [14]:
# Confusion Matrix 계산 및 출력
print("\n=== Confusion Matrices ===")
print("Decision Tree:\n", confusion_matrix(y_test, y_pred_dt))
print("Random Forest:\n", confusion_matrix(y_test, y_pred_rf))
print("SVM:\n", confusion_matrix(y_test, y_pred_svm))
print("Logistic Regression:\n", confusion_matrix(y_test, y_pred_lr))
print("KNN:\n", confusion_matrix(y_test, y_pred_knn))


=== Confusion Matrices ===
Decision Tree:
 [[13  1  0]
 [ 0 14  0]
 [ 1  0  7]]
Random Forest:
 [[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]
SVM:
 [[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]
Logistic Regression:
 [[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]
KNN:
 [[14  0  0]
 [ 1 12  1]
 [ 0  0  8]]
