In [1]:
import pandas as pd
import numpy as np
import pydicom
import cv2

In [2]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # 필요한 만큼만 메모리를 사용하도록 설정
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            
        # 특정 GPU에 연산을 할당
        tf.config.set_visible_devices(gpus[0], 'GPU')
        
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
        
    except RuntimeError as e:
        print(e)


1 Physical GPUs, 1 Logical GPU


In [3]:
train_df = pd.read_pickle('train_data.pkl')

In [4]:
val_df= pd.read_pickle('val_data.pkl')

In [5]:
# 이미지 전처

from tensorflow.keras.utils import Sequence
import numpy as np
import pydicom
import cv2

class DicomDataGenerator(Sequence):
    def __init__(self, dataframe, x_col, y_col, batch_size, target_size, shuffle=True):
        self.dataframe = dataframe
        self.x_col = x_col
        self.y_col = y_col
        self.batch_size = batch_size
        self.target_size = target_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.dataframe) / self.batch_size))

    def __getitem__(self, index):
        batch = self.dataframe.iloc[index*self.batch_size:(index+1)*self.batch_size]
        x, y = self.__data_generation(batch)
        return x, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.dataframe))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, batch):
        images = []
        labels = []

        for _, row in batch.iterrows():
            dicom_path = row[self.x_col]
            dicom = pydicom.dcmread(dicom_path)
            image = dicom.pixel_array
            image = cv2.resize(image, self.target_size)
            image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)
            image = image.astype('float32') / 255.0
            image = np.expand_dims(image, axis=-1)
            images.append(image)
            labels.append(row[self.y_col])
        
        x = np.array(images)
        y = tf.keras.utils.to_categorical(labels, num_classes=3)
        return x, y

# 사용 예시
# train_df와 val_df는 DataFrame 형태로, 'img_file_path'와 'category' 컬럼이 있다고 가정
train_df['category'] = train_df['category'].astype(int)
val_df['category'] = val_df['category'].astype(int)

train_generator = DicomDataGenerator(
    dataframe=train_df,
    x_col='img_file_path',
    y_col='category',
    batch_size=32,
    target_size=(224, 224),
    shuffle=True
)

val_generator = DicomDataGenerator(
    dataframe=val_df,
    x_col='img_file_path',
    y_col='category',
    batch_size=32,
    target_size=(224, 224),
    shuffle=False
)

In [6]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# 데이터를 모두 수집하기 위해 리스트 초기화
train_images = []
train_labels = []
val_images = []
val_labels = []

# train_generator에서 데이터를 추출하여 하나의 큰 배열로 결합
for i in range(len(train_generator)):
    x_batch, y_batch = train_generator[i]
    train_images.append(x_batch)
    train_labels.append(y_batch)

# val_generator에서 데이터를 추출하여 하나의 큰 배열로 결합
for i in range(len(val_generator)):
    x_batch, y_batch = val_generator[i]
    val_images.append(x_batch)
    val_labels.append(y_batch)

In [7]:
# NumPy 배열로 변환
train_images = np.concatenate(train_images, axis=0)
train_labels = np.concatenate(train_labels, axis=0)
val_images = np.concatenate(val_images, axis=0)
val_labels = np.concatenate(val_labels, axis=0)

In [8]:
# 랜덤 포레스트 모델 초기화
rf_model = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42)

# 모델 학습
rf_model.fit(train_images.reshape(len(train_images), -1), np.argmax(train_labels, axis=1))

In [9]:
# 검증 데이터로 예측
val_predictions = rf_model.predict(val_images.reshape(len(val_images), -1))

# 정확도 및 기타 성능 평가
accuracy = accuracy_score(np.argmax(val_labels, axis=1), val_predictions)
print(f'Validation Accuracy: {accuracy:.4f}')
print(classification_report(np.argmax(val_labels, axis=1), val_predictions))

Validation Accuracy: 0.7598
              precision    recall  f1-score   support

           0       0.35      0.20      0.25      1589
           1       0.82      0.92      0.87      7523
           2       0.38      0.20      0.27       616

    accuracy                           0.76      9728
   macro avg       0.52      0.44      0.46      9728
weighted avg       0.71      0.76      0.73      9728



In [10]:
'''
randomForest-no generator
Validation Accuracy: 0.7598
'''

'\nrandomForest-no generator\nValidation Accuracy: 0.7598\n'