In [5]:
import tensorflow as tf
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report
from tensorflow.keras.utils import Sequence

In [7]:
# 1. 224px Îç∞Ïù¥ÌÑ∞ Î°úÎçî Ï†ïÏùò (2ÎßåÏû• ÏÉòÌîåÎßÅ Í∏∞Îä• Ìè¨Ìï®)
class RandomSubsetGenerator(Sequence):
    def __init__(self, x_folder, y_path, batch_size=32, subset_size=20000):
        self.x_folder = x_folder
        self.y_full = np.load(y_path)
        self.batch_size = batch_size
        
        # Ï†ÑÏ≤¥ Îç∞Ïù¥ÌÑ∞ Ï§ë 2Îßå Í∞úÎßå Î¨¥ÏûëÏúÑ ÏÑ†ÌÉù
        total_indices = np.arange(len(self.y_full))
        np.random.seed(42) # Í≤∞Í≥º Ïû¨ÌòÑÏùÑ ÏúÑÌï¥ ÏãúÎìú Í≥†Ï†ï
        self.indices = np.random.choice(total_indices, size=min(subset_size, len(self.y_full)), replace=False)
        self.total = len(self.indices)
        
        print(f"ÌïôÏäµ Îç∞Ïù¥ÌÑ∞: Ï†ÑÏ≤¥ {len(self.y_full)}Ïû• Ï§ë {self.total}Ïû• Î¨¥ÏûëÏúÑ ÏÉòÌîåÎßÅ ÏôÑÎ£å")

    def __len__(self):
        return int(np.ceil(self.total / self.batch_size))

    def __getitem__(self, index):
        batch_ids = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        X = [np.load(os.path.join(self.x_folder, f"img_{i:05d}.npy")) for i in batch_ids]
        y = self.y_full[batch_ids]
        return np.array(X, dtype=np.float32), y

In [8]:
# Í≤ÄÏ¶ùÏö© Î°úÎçî (Ï†ÑÏ≤¥ Í≤ÄÏ¶ù Îç∞Ïù¥ÌÑ∞ ÏÇ¨Ïö©)
class ValGenerator(Sequence):
    def __init__(self, x_folder, y_path, batch_size=32):
        self.x_folder = x_folder
        self.y = np.load(y_path)
        self.batch_size = batch_size
        self.indices = np.arange(len(self.y))

    def __len__(self):
        return int(np.ceil(len(self.y) / self.batch_size))

    def __getitem__(self, index):
        batch_ids = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        X = [np.load(os.path.join(self.x_folder, f"img_{i:05d}.npy")) for i in batch_ids]
        y = self.y[batch_ids]
        return np.array(X, dtype=np.float32), y

In [9]:
# 2. Îç∞Ïù¥ÌÑ∞ Ï§ÄÎπÑ
# (Ìè¥Îçî Í≤ΩÎ°úÎäî ÏÇ¨Ïö©ÏûêÎãò ÌôòÍ≤ΩÏóê ÎßûÍ≤å ÏàòÏ†ïÎê† Ïàò ÏûàÏäµÎãàÎã§. Cell 34, Cell 5 Ï∞∏Í≥†Ìï®)
TRAIN_DIR = "full_npy_images"
TRAIN_LABEL = "y_full.npy"
VAL_DIR = r"D:\workspace\full_val_npy" # Ï†ÑÏ≤¥ Í≤ÄÏ¶ù Îç∞Ïù¥ÌÑ∞ Ìè¥Îçî
VAL_LABEL = os.path.join(VAL_DIR, "y_full.npy")

train_gen_20k = RandomSubsetGenerator(TRAIN_DIR, TRAIN_LABEL, subset_size=20000)
val_gen_full = ValGenerator(VAL_DIR, VAL_LABEL)

ÌïôÏäµ Îç∞Ïù¥ÌÑ∞: Ï†ÑÏ≤¥ 328210Ïû• Ï§ë 20000Ïû• Î¨¥ÏûëÏúÑ ÏÉòÌîåÎßÅ ÏôÑÎ£å


In [13]:
def create_original_model():
    # Cell 14Ïùò ÌïòÏù¥ÌçºÌååÎùºÎØ∏ÌÑ∞
    input_size = 224  # (ÏõêÎ≥∏ 256 -> 224Î°ú Î≥ÄÍ≤Ω)
    kernel_size = 3
    filters = 8
    num_conv_layers = 3
    activation = 'relu'
    
    model = Sequential()
    
    # Conv Î†àÏù¥Ïñ¥ ÏåìÍ∏∞ (8 -> 16 -> 32)
    for i in range(num_conv_layers):
        filter_count = filters * (2 ** i)
        if i == 0:
            model.add(Conv2D(filter_count, (kernel_size, kernel_size), 
                           input_shape=(input_size, input_size, 3)))
        else:
            model.add(Conv2D(filter_count, (kernel_size, kernel_size)))
            
        model.add(Activation(activation))
        model.add(MaxPooling2D())

    model.add(Flatten())
    
    # Dense Î†àÏù¥Ïñ¥ (128 -> 64 -> 32 -> 1)
    model.add(Dense(128))
    model.add(Activation(activation))
    
    model.add(Dense(64))
    model.add(Activation(activation))
    
    model.add(Dense(32))
    model.add(Activation(activation))
    
    # Ï∂úÎ†•Ï∏µ
    model.add(Dense(1, activation='sigmoid'))
    
    # Ïª¥ÌååÏùº (Adam Í∏∞Î≥∏Í∞í ÏÇ¨Ïö© Ï∂îÏ†ï)
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    return model
model = create_model()

In [14]:
# 4. ÌïôÏäµ 
print("2Îßå Ïû• Îç∞Ïù¥ÌÑ∞Î°ú ÌïôÏäµ ÏãúÏûë")
model.fit(train_gen_20k, epochs=5)

2Îßå Ïû• Îç∞Ïù¥ÌÑ∞Î°ú ÌïôÏäµ ÏãúÏûë
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x153ab45a070>

In [12]:
# 5. Ï†ÑÏ≤¥ Í≤ÄÏ¶ù Îç∞Ïù¥ÌÑ∞Î°ú ÌèâÍ∞Ä
print("üìä Ï†ÑÏ≤¥ Í≤ÄÏ¶ù Îç∞Ïù¥ÌÑ∞(41,025Ïû•)Ïóê ÎåÄÌïú ÌèâÍ∞Ä Í≤∞Í≥º:")
y_true = np.load(VAL_LABEL)
y_pred_prob = model.predict(val_gen_full)
y_pred = (y_pred_prob > 0.5).astype(int).flatten()

print(classification_report(y_true, y_pred, digits=4))

üìä Ï†ÑÏ≤¥ Í≤ÄÏ¶ù Îç∞Ïù¥ÌÑ∞(41,025Ïû•)Ïóê ÎåÄÌïú ÌèâÍ∞Ä Í≤∞Í≥º:
              precision    recall  f1-score   support

           0     0.7616    0.4345    0.5533     20560
           1     0.6031    0.8633    0.7101     20465

    accuracy                         0.6484     41025
   macro avg     0.6823    0.6489    0.6317     41025
weighted avg     0.6825    0.6484    0.6315     41025

