In [1]:
'''
cnn으로 특징 벡터 추출 -> xgboost 모델 학습
'''

'\ncnn으로 특징 벡터 추출 -> xgboost 모델 학습\n'

In [11]:
import pandas as pd
import numpy as np
import tensorflow as tf
import pydicom
import cv2 
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from xgboost import XGBClassifier

In [3]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # 필요한 만큼만 메모리를 사용하도록 설정
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            
        # 특정 GPU에 연산을 할당
        tf.config.set_visible_devices(gpus[0], 'GPU')
        
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
        
    except RuntimeError as e:
        print(e)


1 Physical GPUs, 1 Logical GPU


In [4]:
train_df = pd.read_pickle('train_data.pkl')

In [5]:
val_df= pd.read_pickle('val_data.pkl')

In [6]:
import tensorflow.keras.backend as K
def weighted_log_loss(y_true, y_pred):
    class_weights = K.constant([1.0, 2.0, 4.0])
    y_true = K.cast(y_true, y_pred.dtype)
    weights = K.sum(y_true * class_weights, axis=-1)
    loss = K.sum(y_true * K.log(y_pred + K.epsilon()), axis=-1)
    weighted_loss = -weights * loss
    return K.mean(weighted_loss)

In [7]:
from tensorflow.keras.layers import Input

# 기존 cnn모델
cnn = tf.keras.Sequential([
    Input(shape=(224, 224, 1)),  # Define input shape here
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten()
])

# CNN에서 특징 벡터 추출 모델 정의
feature_extractor = Model(inputs=cnn.inputs, outputs=cnn.layers[-1].output)

# 특징 벡터 추출
train_features = []
train_labels = []

In [8]:
# 데이터프레임에 있는 각 이미지에 대해 특징 벡터 추출
for file_path, label in zip(train_df['img_file_path'], train_df['category']):
    dicom = pydicom.dcmread(file_path)
    image = dicom.pixel_array
    image = cv2.resize(image, (224, 224))
    image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)
    image = image.astype('float32') / 255.0
    
    image = np.expand_dims(image, axis=-1)
    feature = feature_extractor.predict(np.array([image]))
    train_features.append(feature.flatten())
    train_labels.append(label)

train_features = np.array(train_features)
train_labels = np.array(train_labels)



In [9]:
# 검증셋
val_features = []
val_labels = []

for file_path, label in zip(val_df['img_file_path'], val_df['category']):
    dicom = pydicom.dcmread(file_path)
    image = dicom.pixel_array
    image = cv2.resize(image, (224, 224))
    image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)
    image = image.astype('float32') / 255.0
    image = np.expand_dims(image, axis=-1)
    feature = feature_extractor.predict(np.array([image]))
    val_features.append(feature.flatten())
    val_labels.append(label)

val_features = np.array(val_features)
val_labels = np.array(val_labels)



In [20]:
train_features.dtype

dtype('float32')

In [24]:
# XGBoost 모델 학습
xgb_model = XGBClassifier(max_depth=3, learning_rate=0.1, 
                          n_estimators=50, subsample=0.8, tree_method='hist', max_bin=64)
xgb_model.fit(train_features, train_labels)

# 예측 및 평가
y_pred = xgb_model.predict(val_features)
accuracy = accuracy_score(val_labels, y_pred)
print(f"Test Accuracy: {accuracy}")

XGBoostError: bad allocation

In [21]:
import xgboost as xgb
# 배치로 나누어 처리
# DMatrix 객체로 변환
dtrain = xgb.DMatrix(train_features, label=train_labels)
dtest = xgb.DMatrix(val_features, label=val_labels)

XGBoostError: bad allocation

In [19]:


# XGBoost 파라미터 설정
params = {
    'max_depth': 6,
    'eta': 0.3,
    'objective': 'multi:softprob',  # 다중 클래스 분류
    'num_class': 3  # 클래스 개수에 맞게 설정
}

# XGBoost 모델 학습 (batch_size와 학습 데이터를 나누어 사용)
num_round = 100
batch_size = 500  # 배치 크기 설정

for i in range(0, len(train_features), batch_size):
    end_index = min(i + batch_size, len(X_train))
    X_batch = train_features[i:end_index]
    y_batch = val_features[i:end_index]
    dtrain_batch = xgb.DMatrix(X_batch, label=y_batch)
    bst = xgb.train(params, dtrain_batch, num_boost_round=num_round, xgb_model=bst if 'bst' in locals() else None)

# 모델 평가
y_pred = np.argmax(bst.predict(dtest), axis=1)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy}")

XGBoostError: bad allocation

In [28]:
# 검증 데이터 예시 (val_features, val_labels)
dval = xgb.DMatrix(val_features, label=val_labels)
y_pred = booster.predict(dval)


XGBoostError: bad allocation

In [32]:
batch_size = 100  # 더 작은 배치 크기
num_batches = (len(train_features) + batch_size - 1) // batch_size  # 올림 계산

# DMatrix 객체 생성
dmatrix_list = [
    create_dmatrix(train_features[i * batch_size:(i + 1) * batch_size],
                   train_labels[i * batch_size:(i + 1) * batch_size])
    for i in range(num_batches)
]


XGBoostError: bad allocation

In [33]:
import gc
gc.collect()


9609