### diabates 당뇨병 분류를 CNN으로 구현

In [31]:
# 데이터 로드 및 전처리

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

In [32]:
# 데이터 로드
url = "C:/Users/dnwjd/OneDrive/Desktop/CSE_6/딥러닝/AI-class-main/AI-class-main/diabetes.csv"
data = pd.read_csv(url)

In [33]:
# 데이터 확인
print(data.head())

   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [34]:
# 특징과 레이블 분리 (Outcome은 당뇨병 여부를 나타내는 라벨)
X = data.drop('Outcome', axis=1).values  # Features
y = data['Outcome'].values  # Label

In [35]:
# 데이터 분할 (훈련 및 테스트 데이터)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [36]:
# 데이터 정규화 (StandardScaler 사용)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [37]:
# CNN에 맞게 데이터 차원 변경 (samples, timesteps, features) - CNN에서는 (samples, features, 1)로 reshape
X_train_cnn = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
X_test_cnn = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

In [38]:
# CNN 모델 구성 및 학습 

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

In [39]:
# CNN 모델 구성
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train_scaled.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Flatten())
cnn_model.add(Dense(32, activation='relu'))
cnn_model.add(Dense(1, activation='sigmoid'))  # 이진 분류

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [40]:
# 모델 컴파일
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [41]:
# 모델 학습
cnn_model.fit(X_train_cnn, y_train, epochs=50, batch_size=32, verbose=1)

Epoch 1/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.6959 - loss: 0.6389
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7450 - loss: 0.5648
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7589 - loss: 0.5330
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7591 - loss: 0.5097
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7858 - loss: 0.4811
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7569 - loss: 0.4937
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7562 - loss: 0.4718
Epoch 8/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7744 - loss: 0.4599
Epoch 9/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x2ca875e23f0>

In [42]:
# 성능 평가
loss, accuracy = cnn_model.evaluate(X_test_cnn, y_test)
print(f'CNN Classification Accuracy: {accuracy * 100:.2f}%')

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7632 - loss: 0.5061  
CNN Classification Accuracy: 75.97%


In [43]:
# 학습된 모델 저장
cnn_model.save("diabetes_cnn_model.h5")
print("CNN Model saved successfully!")



CNN Model saved successfully!


### 저장한 모델을 사전학습 모델로 불러와 층 추가 후 Fine-tunning

In [52]:
# 모델 불러오기 및 추가적인 층 구성 

from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Dropout, Dense

In [53]:
# 1. 사전 학습된 모델 불러오기 (기존 모델이 컴파일된 상태)
loaded_model = load_model("diabetes_cnn_model.h5", compile=False)  # compile=False로 설정

In [54]:
# 2. 사전 학습된 모델의 가중치를 고정 (freeze)
for layer in loaded_model.layers:
    layer.trainable = False  # 기존 모델의 가중치를 고정

In [55]:
# 3. 새로운 Sequential 모델을 구성하고, 사전 학습된 모델을 추가한 후 새로운 층을 추가
fine_tuned_model = Sequential()

In [56]:
# 4. 사전 학습된 모델을 첫 번째 층으로 추가
fine_tuned_model.add(loaded_model)

In [57]:
# 5. 추가적인 층 구성 (새로운 Dense 층 추가)
fine_tuned_model.add(Dense(16, activation='relu'))
fine_tuned_model.add(Dropout(0.5))  # 과적합 방지를 위한 Dropout
fine_tuned_model.add(Dense(1, activation='sigmoid'))  # 출력층 (이진 분류)

In [58]:
# 6. 모델 컴파일 (새로 추가한 층만 학습되도록 컴파일)
fine_tuned_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [59]:
# 7. Fine-Tuning 학습
fine_tuned_model.fit(X_train_cnn, y_train, epochs=30, batch_size=32, verbose=1)

Epoch 1/30
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5690 - loss: 0.6906
Epoch 2/30
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6476 - loss: 0.6884 
Epoch 3/30
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7003 - loss: 0.6848
Epoch 4/30
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7113 - loss: 0.6760 
Epoch 5/30
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7148 - loss: 0.6723 
Epoch 6/30
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6964 - loss: 0.6742 
Epoch 7/30
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7001 - loss: 0.6627
Epoch 8/30
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7083 - loss: 0.6602
Epoch 9/30
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[

<keras.src.callbacks.history.History at 0x2ca8e3f0080>

In [60]:
# 8. Fine-Tuned 모델 평가
loss, accuracy = fine_tuned_model.evaluate(X_test_cnn, y_test)
print(f'Fine-Tuned Model Accuracy: {accuracy * 100:.2f}%')

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7719 - loss: 0.5703  
Fine-Tuned Model Accuracy: 77.92%
