### 1. 당뇨병 데이터를 가지고 머신러닝 5가지 분류를 수행. (SVM, LR, RF, DT, KNN)

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

In [2]:
url = "C:/Users/dnwjd/OneDrive/Desktop/CSE_6/딥러닝/AI-class-main/AI-class-main/diabetes.csv"
data = pd.read_csv(url)

In [5]:
data

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [6]:
X = data.drop('Outcome', axis=1)
y = data['Outcome']

In [9]:
# 데이터셋 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# 데이터 표준화
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
# 모델 리스트
models = {
    'SVM': SVC(),
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'KNN': KNeighborsClassifier()
}

In [13]:
# 각 모델 학습 및 평가
for name, model in models.items():
    # 모델 학습
    model.fit(X_train, y_train)
    
    # 예측
    y_pred = model.predict(X_test)
    
    # 성능 평가
    print(f"=== {name} ===")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred))
    print("\n")

=== SVM ===
Accuracy: 0.7338
              precision    recall  f1-score   support

           0       0.77      0.83      0.80        99
           1       0.65      0.56      0.60        55

    accuracy                           0.73       154
   macro avg       0.71      0.70      0.70       154
weighted avg       0.73      0.73      0.73       154



=== Logistic Regression ===
Accuracy: 0.7532
              precision    recall  f1-score   support

           0       0.81      0.80      0.81        99
           1       0.65      0.67      0.66        55

    accuracy                           0.75       154
   macro avg       0.73      0.74      0.73       154
weighted avg       0.76      0.75      0.75       154



=== Random Forest ===
Accuracy: 0.7338
              precision    recall  f1-score   support

           0       0.80      0.79      0.79        99
           1       0.62      0.64      0.63        55

    accuracy                           0.73       154
   macro av

### 2. 동일한 데이터로 딥러닝 분류 수행하라. (dense layer 만 사용)

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [10]:
# 데이터 로드
url = "C:/Users/dnwjd/OneDrive/Desktop/CSE_6/딥러닝/AI-class-main/AI-class-main/diabetes.csv"
data = pd.read_csv(url)

In [11]:
print(data.head())

   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [12]:
X = data.drop('Outcome', axis=1)
y = data['Outcome']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [15]:
# 딥러닝 모델 생성 (Dense Layer만 사용)
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),  # 입력층 정의
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # 이진 분류를 위한 시그모이드 활성화 함수
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
# 모델 컴파일
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [17]:
# 모델 학습
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

Epoch 1/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 34ms/step - accuracy: 0.5196 - loss: 0.6910 - val_accuracy: 0.6992 - val_loss: 0.6492
Epoch 2/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7253 - loss: 0.6069 - val_accuracy: 0.7073 - val_loss: 0.5824
Epoch 3/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7690 - loss: 0.5476 - val_accuracy: 0.7317 - val_loss: 0.5405
Epoch 4/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7724 - loss: 0.4961 - val_accuracy: 0.7317 - val_loss: 0.5146
Epoch 5/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7998 - loss: 0.4516 - val_accuracy: 0.7236 - val_loss: 0.4996
Epoch 6/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7797 - loss: 0.4699 - val_accuracy: 0.7398 - val_loss: 0.4865
Epoch 7/20
[1m16/16[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x18dff2099d0>

In [18]:
# 모델 평가
loss, accuracy = model.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {accuracy:.4f}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7064 - loss: 0.5635 

Test Accuracy: 0.7338


### 3. 해당 데이터에서 Outcome을 삭제하고 BMI를 예측하는 회귀를 수행하라.

In [19]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [20]:
# BMI를 예측하기 위해 Outcome 열 삭제
X_reg = data.drop(['Outcome', 'BMI'], axis=1)  # BMI를 예측할 대상 변수로 설정, Outcome은 제거
y_reg = data['BMI']

In [21]:
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

In [22]:
scaler_reg = StandardScaler()
X_train_reg = scaler_reg.fit_transform(X_train_reg)
X_test_reg = scaler_reg.transform(X_test_reg)

In [23]:
# 회귀 모델 학습 (랜덤 포레스트 회귀 사용)
regressor = RandomForestRegressor(random_state=42)
regressor.fit(X_train_reg, y_train_reg)

In [24]:
# 예측
y_pred_reg = regressor.predict(X_test_reg)

In [25]:
print(f"Mean Squared Error: {mean_squared_error(y_test_reg, y_pred_reg):.4f}")
print(f"R^2 Score: {r2_score(y_test_reg, y_pred_reg):.4f}")

Mean Squared Error: 48.6195
R^2 Score: 0.3160


### 4. 3번과 동일하지만 dense layer만 사용한 신경만으로 회귀를 수행하라.

In [26]:
# 딥러닝 회귀 모델 생성 (Dense Layer만 사용)
reg_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_reg.shape[1],)),  # 입력층 정의
    Dense(32, activation='relu'),
    Dense(1)  # 회귀를 위한 출력층, 활성화 함수 없음
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [27]:
reg_model.compile(optimizer='adam', loss='mse', metrics=['mse'])

In [28]:
reg_model.fit(X_train_reg, y_train_reg, epochs=20, batch_size=32, validation_split=0.2)

Epoch 1/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: 1029.8733 - mse: 1029.8733 - val_loss: 1036.1920 - val_mse: 1036.1920
Epoch 2/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 995.6545 - mse: 995.6545 - val_loss: 967.6113 - val_mse: 967.6113
Epoch 3/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 935.0311 - mse: 935.0311 - val_loss: 878.1620 - val_mse: 878.1620
Epoch 4/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 796.7159 - mse: 796.7159 - val_loss: 762.4872 - val_mse: 762.4872
Epoch 5/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 694.3179 - mse: 694.3179 - val_loss: 624.7720 - val_mse: 624.7720
Epoch 6/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 581.9335 - mse: 581.9335 - val_loss: 471.2216 - val_mse: 471.2216
Epoch 7/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x18d843ed0d0>

In [29]:
loss, mse = reg_model.evaluate(X_test_reg, y_test_reg)
print(f"\nTest Mean Squared Error: {mse:.4f}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 77.7371 - mse: 77.7371 

Test Mean Squared Error: 83.4076
