### 1. 당뇨병 데이터를 가지고 머신러닝 5가지 분류를 수행. (SVM, LR, RF, DT, KNN)

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [5]:
url = "C:/Users/dnwjd/OneDrive/Desktop/CSE_6/딥러닝/AI-class-main/AI-class-main/diabetes.csv"
data = pd.read_csv(url)

In [6]:
# 데이터 확인
print(data.head())

   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [7]:
# 2. 특성과 라벨 분리 (Outcome이 타겟 변수로 가정)
X = data.drop('Outcome', axis=1)  # 'Outcome' 제외한 나머지가 특성
y = data['Outcome']  # 'Outcome'이 목표 변수

In [8]:
# 3. 학습/테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# 4. 데이터 스케일링
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
# 5. 머신러닝 모델 정의 및 학습
models = {
    'SVM': SVC(),
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'KNN': KNeighborsClassifier()
}

In [13]:
print("\n📊 머신러닝 모델 성능:")
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f'{name} Accuracy: {acc:.4f}')


📊 머신러닝 모델 성능:
SVM Accuracy: 0.7338
Logistic Regression Accuracy: 0.7532
Random Forest Accuracy: 0.7403
Decision Tree Accuracy: 0.7338
KNN Accuracy: 0.6948


### 2. 동일한 데이터로 딥러닝 분류 수행하라. (dense layer 만 사용)

In [14]:
# 6. 딥러닝 모델 정의 및 학습
# 딥러닝 분류 모델 구성 (Dense Layer만 사용)
dl_model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')  # 이진 분류이므로 sigmoid 사용
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
# 7. 모델 컴파일 및 학습
dl_model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
dl_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - accuracy: 0.6593 - loss: 0.6440 - val_accuracy: 0.6992 - val_loss: 0.6275
Epoch 2/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7015 - loss: 0.6066 - val_accuracy: 0.7317 - val_loss: 0.5945
Epoch 3/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7133 - loss: 0.5676 - val_accuracy: 0.7480 - val_loss: 0.5674
Epoch 4/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7582 - loss: 0.5214 - val_accuracy: 0.7317 - val_loss: 0.5427
Epoch 5/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7721 - loss: 0.5054 - val_accuracy: 0.7317 - val_loss: 0.5222
Epoch 6/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7655 - loss: 0.4891 - val_accuracy: 0.7398 - val_loss: 0.5092
Epoch 7/50
[1m16/16[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x1fe7856d280>

In [16]:
# 8. 딥러닝 모델 평가
loss, accuracy = dl_model.evaluate(X_test, y_test)
print(f"\n🤖 딥러닝 모델 정확도: {accuracy:.4f}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7398 - loss: 0.5860 

🤖 딥러닝 모델 정확도: 0.7468


### 3. 해당 데이터에서 Outcome을 삭제하고 BMI를 예측하는 회귀를 수행하라.

In [17]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [18]:
# 3. Outcome을 삭제하고 BMI 예측을 위한 데이터 준비
# BMI는 diabetes 데이터에서 'BMI' 컬럼에 해당하는 값을 목표 변수로 설정 (해당 컬럼이 있는지 확인 필요)
X_bmi = data.drop(['Outcome', 'BMI'], axis=1)  # Outcome과 BMI를 제외한 나머지 특성 사용
y_bmi = data['BMI']  # BMI가 목표 변수

In [19]:
# 학습/테스트 데이터 분리
X_train_bmi, X_test_bmi, y_train_bmi, y_test_bmi = train_test_split(X_bmi, y_bmi, test_size=0.2, random_state=42)

In [20]:
# 4. 데이터 스케일링
scaler_bmi = StandardScaler()
X_train_bmi = scaler_bmi.fit_transform(X_train_bmi)
X_test_bmi = scaler_bmi.transform(X_test_bmi)

In [21]:
# 5. 선형 회귀 모델 학습
lr_model = LinearRegression()
lr_model.fit(X_train_bmi, y_train_bmi)

In [22]:
# 6. 예측 및 평가
y_pred_bmi = lr_model.predict(X_test_bmi)
mse_bmi = mean_squared_error(y_test_bmi, y_pred_bmi)
print(f"Linear Regression MSE (BMI Prediction): {mse_bmi:.4f}")

Linear Regression MSE (BMI Prediction): 52.2406


### 4. 3번과 동일하지만 dense layer만 사용한 신경만으로 회귀를 수행하라.

In [23]:
# 딥러닝 모델 구성 (Dense Layer만 사용)
dl_regression_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_bmi.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)  # 회귀 문제이므로 출력층에 활성화 함수 없음
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
# 모델 컴파일 (회귀 문제에서 손실 함수는 MSE 사용)
dl_regression_model.compile(optimizer=Adam(), loss='mean_squared_error')

In [25]:
# 모델 학습
dl_regression_model.fit(X_train_bmi, y_train_bmi, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 1058.1295 - val_loss: 1036.1177
Epoch 2/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1023.5762 - val_loss: 970.3041
Epoch 3/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 928.4083 - val_loss: 875.9609
Epoch 4/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 813.4957 - val_loss: 744.7643
Epoch 5/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 669.3990 - val_loss: 579.0233
Epoch 6/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 528.4648 - val_loss: 400.0691
Epoch 7/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 352.8435 - val_loss: 244.5531
Epoch 8/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 223.8973 - val_loss: 143.0914
Epoch 9/50
[1m16/16

<keras.src.callbacks.history.History at 0x1fe78aec080>

In [26]:
# 모델 평가
mse_dl_bmi = dl_regression_model.evaluate(X_test_bmi, y_test_bmi)
print(f"Deep Learning MSE (BMI Prediction): {mse_dl_bmi:.4f}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 69.7753 
Deep Learning MSE (BMI Prediction): 72.6992
