In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

# 붓꽃 데이터 CSV 파일 읽기
file_path = r"C:\Users\yale3\Downloads\diabetes.csv"
df = pd.read_csv(file_path)

# 데이터프레임 확인
display(df.head())
print(df.columns)

# 특성과 레이블 분리
X = df.iloc[:, :-1]  # 마지막 열 제외한 특성
y = df.iloc[:, -1]   # 마지막 열(레이블)

# 학습용, 테스트용 데이터 분리 (8:2 비율)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')


In [8]:
# Decision Tree 모델
print("Decision Tree 코드")
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)
print("Decision Tree 결과: Accuracy =", accuracy_score(y_test, y_pred_dt))
print("\n", classification_report(y_test, y_pred_dt))


Decision Tree 코드
Decision Tree 결과: Accuracy = 0.7467532467532467

               precision    recall  f1-score   support

           0       0.83      0.76      0.79        99
           1       0.62      0.73      0.67        55

    accuracy                           0.75       154
   macro avg       0.73      0.74      0.73       154
weighted avg       0.76      0.75      0.75       154



In [9]:
# Logistic Regression 모델
print("Logistic Regression 코드")
lr_model = LogisticRegression(max_iter=200)
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)
print("Logistic Regression 결과: Accuracy =", accuracy_score(y_test, y_pred_lr))
print("\n", classification_report(y_test, y_pred_lr))

Logistic Regression 코드
Logistic Regression 결과: Accuracy = 0.7467532467532467

               precision    recall  f1-score   support

           0       0.81      0.79      0.80        99
           1       0.64      0.67      0.65        55

    accuracy                           0.75       154
   macro avg       0.73      0.73      0.73       154
weighted avg       0.75      0.75      0.75       154



In [16]:
# Support Vector Machine 모델
print("SVM 코드")
svm_model = SVC()
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
print("SVM 결과: Accuracy =", accuracy_score(y_test, y_pred_svm))
print("\n", classification_report(y_test, y_pred_svm))

SVM 코드
SVM 결과: Accuracy = 0.7662337662337663

               precision    recall  f1-score   support

           0       0.78      0.88      0.83        99
           1       0.72      0.56      0.63        55

    accuracy                           0.77       154
   macro avg       0.75      0.72      0.73       154
weighted avg       0.76      0.77      0.76       154



In [15]:
# Random Forest 모델
print("Random Forest 코드")
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("Random Forest 결과: Accuracy =", accuracy_score(y_test, y_pred_rf))
print("\n", classification_report(y_test, y_pred_rf))

Random Forest 코드
Random Forest 결과: Accuracy = 0.7207792207792207

               precision    recall  f1-score   support

           0       0.79      0.78      0.78        99
           1       0.61      0.62      0.61        55

    accuracy                           0.72       154
   macro avg       0.70      0.70      0.70       154
weighted avg       0.72      0.72      0.72       154



In [14]:
# 4️⃣ 데이터 정규화
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5️⃣ KNN 모델 생성 및 학습
k = 3  # 최근접 이웃 개수
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train_scaled, y_train)

# 6️⃣ 예측 및 평가
y_pred_knn = knn_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred_knn)

print(f"KNN 결과: Accuracy = {accuracy:.4f}")
print("\n", classification_report(y_test, y_pred_knn))

KNN 결과: Accuracy = 0.7078

               precision    recall  f1-score   support

           0       0.75      0.82      0.78        99
           1       0.61      0.51      0.55        55

    accuracy                           0.71       154
   macro avg       0.68      0.66      0.67       154
weighted avg       0.70      0.71      0.70       154

