In [1]:
import pandas as pd
import numpy as np
import pydicom
import cv2

In [2]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # 필요한 만큼만 메모리를 사용하도록 설정
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            
        # 특정 GPU에 연산을 할당
        tf.config.set_visible_devices(gpus[0], 'GPU')
        
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
        
    except RuntimeError as e:
        print(e)

1 Physical GPUs, 1 Logical GPU


In [3]:
train_df = pd.read_pickle('train_data.pkl')

In [4]:
val_df= pd.read_pickle('val_data.pkl')

In [5]:
# 이미지 전처리


from tensorflow.keras.utils import Sequence
import numpy as np
import pydicom
import cv2

class DicomDataGenerator(Sequence):
    def __init__(self, dataframe, x_col, y_col, batch_size, target_size, shuffle=True):
        self.dataframe = dataframe
        self.x_col = x_col
        self.y_col = y_col
        self.batch_size = batch_size
        self.target_size = target_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.dataframe) / self.batch_size))

    def __getitem__(self, index):
        batch = self.dataframe.iloc[index*self.batch_size:(index+1)*self.batch_size]
        x, y = self.__data_generation(batch)
        return x, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.dataframe))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, batch):
        images = []
        labels = []

        for _, row in batch.iterrows():
            dicom_path = row[self.x_col]
            dicom = pydicom.dcmread(dicom_path)
            image = dicom.pixel_array
            image = cv2.resize(image, self.target_size)
            image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)
            image = image.astype('float32') / 255.0
            image = np.expand_dims(image, axis=-1)
            images.append(image)
            labels.append(row[self.y_col])
        
        x = np.array(images)
        y = tf.keras.utils.to_categorical(labels, num_classes=3)
        return x, y

# 사용 예시
# train_df와 val_df는 DataFrame 형태로, 'img_file_path'와 'category' 컬럼이 있다고 가정
train_df['category'] = train_df['category'].astype(int)
val_df['category'] = val_df['category'].astype(int)

train_generator = DicomDataGenerator(
    dataframe=train_df,
    x_col='img_file_path',
    y_col='category',
    batch_size=32,
    target_size=(224, 224),
    shuffle=True
)

val_generator = DicomDataGenerator(
    dataframe=val_df,
    x_col='img_file_path',
    y_col='category',
    batch_size=32,
    target_size=(224, 224),
    shuffle=False
)

In [6]:
import numpy as np
import tensorflow as tf
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# CNN 모델 정의
cnn = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(224, 224, 1)),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten()
])

# CNN 모델 출력 특징 추출
def extract_features(generator, model):
    features = []
    labels = []
    for x_batch, y_batch in generator:
        feature = model.predict(x_batch)
        features.append(feature)
        labels.append(y_batch)
    return np.concatenate(features), np.concatenate(labels)

# 특징 추출
train_features, train_labels = extract_features(train_generator, cnn)
val_features, val_labels = extract_features(val_generator, cnn)



MemoryError: Unable to allocate 10.6 MiB for an array with shape (32, 86528) and data type float32

In [10]:
import os

def save_features_to_disk(generator, model, save_dir, prefix):
    os.makedirs(save_dir, exist_ok=True)
    for i, (x_batch, y_batch) in enumerate(generator):
        feature = model.predict(x_batch)
        feature_path = os.path.join(save_dir, f"{prefix}_features_{i}.npy")
        label_path = os.path.join(save_dir, f"{prefix}_labels_{i}.npy")
        np.save(feature_path, feature)
        np.save(label_path, y_batch)

save_features_to_disk(train_generator, cnn, 'train_features', 'train')
save_features_to_disk(val_generator, cnn, 'val_features', 'val')



In [11]:
def load_features_from_disk(save_dir, prefix):
    features = []
    labels = []
    for feature_file in sorted(os.listdir(save_dir)):
        if feature_file.startswith(prefix) and feature_file.endswith('_features.npy'):
            feature_path = os.path.join(save_dir, feature_file)
            label_path = feature_path.replace('_features.npy', '_labels.npy')
            features.append(np.load(feature_path))
            labels.append(np.load(label_path))
    return np.concatenate(features), np.concatenate(labels)

train_features, train_labels = load_features_from_disk('train_features', 'train')
val_features, val_labels = load_features_from_disk('val_features', 'val')

ValueError: need at least one array to concatenate

In [7]:
# 차원을 평탄화
train_features = train_features.reshape(train_features.shape[0], -1)
val_features = val_features.reshape(val_features.shape[0], -1)

NameError: name 'train_features' is not defined

In [8]:
# 랜덤 포레스트 모델 학습
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(train_features, np.argmax(train_labels, axis=1))

NameError: name 'train_features' is not defined

In [None]:
# 예측 및 성능 평가
val_predictions = rf_model.predict(val_features)
val_accuracy = accuracy_score(np.argmax(val_labels, axis=1), val_predictions)

print(f"Validation Accuracy: {val_accuracy}")

In [None]:
'''
cnn + rdf(no generator)

'''