In [1]:
import pandas as pd

# 데이터 불러오기
data_path = "C:/Users/tksmd/diabetes.csv"
df = pd.read_csv(data_path)

# 데이터 확인
df.head()


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# 데이터 전처리 및 준비
X = df.drop('Outcome', axis=1).values  # 가정: 'Outcome'이 레이블
y = df['Outcome'].values

# 데이터 정규화
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# CNN 입력으로 reshape (만약 1D 데이터를 사용할 경우 CNN 적용을 위해 차원 확장)
X_train_cnn = np.expand_dims(X_train, axis=-1)  # (samples, features, 1) 형태로 만듦
X_test_cnn = np.expand_dims(X_test, axis=-1)

# CNN 모델 정의 (MaxPooling1D를 조정)
cnn_model = models.Sequential([
    layers.Conv1D(32, 2, activation='relu', input_shape=(X_train_cnn.shape[1], 1)),
    layers.MaxPooling1D(pool_size=2),  # 입력 크기에 맞는 적절한 풀링 사용
    layers.Conv1D(64, 2, activation='relu'),
    layers.Flatten(),  # 데이터를 1차원으로 변환
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # 이진 분류
])

# 모델 컴파일
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 모델 학습
cnn_model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test))


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.6414 - loss: 0.6531 - val_accuracy: 0.6948 - val_loss: 0.5883
Epoch 2/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7089 - loss: 0.5628 - val_accuracy: 0.7922 - val_loss: 0.5028
Epoch 3/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7350 - loss: 0.5042 - val_accuracy: 0.8052 - val_loss: 0.4798
Epoch 4/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7519 - loss: 0.5178 - val_accuracy: 0.8052 - val_loss: 0.4798
Epoch 5/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7832 - loss: 0.4702 - val_accuracy: 0.7532 - val_loss: 0.4837
Epoch 6/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7659 - loss: 0.4727 - val_accuracy: 0.7987 - val_loss: 0.4857
Epoch 7/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x17d7fc7d290>

In [4]:
pip install pytorch-tabnet

Collecting pytorch-tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl.metadata (15 kB)
Collecting torch>=1.3 (from pytorch-tabnet)
  Downloading torch-2.4.1-cp311-cp311-win_amd64.whl.metadata (27 kB)
Downloading pytorch_tabnet-4.1.0-py3-none-any.whl (44 kB)
   ---------------------------------------- 0.0/44.5 kB ? eta -:--:--
   ---------------------------------------- 44.5/44.5 kB 2.1 MB/s eta 0:00:00
Downloading torch-2.4.1-cp311-cp311-win_amd64.whl (199.4 MB)
   ---------------------------------------- 0.0/199.4 MB ? eta -:--:--
   ---------------------------------------- 0.3/199.4 MB 5.9 MB/s eta 0:00:34
   ---------------------------------------- 0.7/199.4 MB 7.4 MB/s eta 0:00:27
   ---------------------------------------- 1.2/199.4 MB 8.3 MB/s eta 0:00:24
   ---------------------------------------- 1.7/199.4 MB 8.9 MB/s eta 0:00:23
   ---------------------------------------- 2.1/199.4 MB 8.8 MB/s eta 0:00:23
   ---------------------------------------- 2.4/199.4 MB 8.6 MB/

In [3]:
from tensorflow.keras.applications import VGG16

# 데이터 차원을 3D로 변환하여 사전학습 모델에 적용할 수 있도록 준비
X_train_vgg = np.expand_dims(np.expand_dims(X_train, axis=-1), axis=-1)
X_test_vgg = np.expand_dims(np.expand_dims(X_test, axis=-1), axis=-1)

# 사전학습된 VGG16 모델 불러오기 (include_top=False로 분류기 부분 제거)
vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# VGG16 모델의 가중치를 고정 (프리징)
vgg_base.trainable = False

# 새로운 모델에 VGG16의 출력을 추가하고, 사용자 정의 분류기 추가
model = models.Sequential([
    vgg_base,
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # 이진 분류
])

# 모델 컴파일
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 모델 학습
model.fit(X_train_vgg, y_train, epochs=10, batch_size=32, validation_data=(X_test_vgg, y_test))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 0us/step
Epoch 1/10


ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "vgg16" is incompatible with the layer: expected shape=(None, 224, 224, 3), found shape=(None, 8, 1)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, 8, 1, 1), dtype=float32)
  • training=True
  • mask=None

In [None]:
# VGG16의 상위 레이어를 풀어주어 파인튜닝 가능하도록 설정
for layer in vgg_base.layers[-4:]:
    layer.trainable = True

# 모델 다시 컴파일 (학습률을 낮게 설정하는 것이 중요)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy'])

# 모델 학습 (파인튜닝)
model.fit(X_train_vgg, y_train, epochs=10, batch_size=32, validation_data=(X_test_vgg, y_test))
