In [1]:
!pip install torch torchvision timm wandb tqdm scikit-learn seaborn matplotlib pandas
!pip install tensorflow
print(tf.__version__)





NameError: name 'tf' is not defined

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns
import os

class DefectClassifier:
    def __init__(self, img_size=(224, 224), batch_size=32):  # 이미지 크기와 배치 크기 수정
        self.img_size = img_size
        self.batch_size = batch_size
        self.model = None
        self.history = None
        
    def create_model(self):
        """이진 분류를 위한 EfficientNetV2L 모델 생성"""
        base_model = tf.keras.applications.efficientnet_v2.EfficientNetV2L(
            include_top=False,
            weights='imagenet',
            input_shape=(*self.img_size, 3),
            pooling=None
        )
        
        # 전이학습을 위해 베이스 모델 동결
        base_model.trainable = False
        
        model = models.Sequential([
            base_model,
            layers.GlobalAveragePooling2D(),
            layers.BatchNormalization(),
            layers.Dense(512, activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.5),
            layers.Dense(128, activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.3),
            layers.Dense(1, activation='sigmoid')
        ])
        
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
            loss='binary_crossentropy',
            metrics=['accuracy',
                    tf.keras.metrics.Precision(),
                    tf.keras.metrics.Recall(),
                    tf.keras.metrics.AUC()]
        )
        
        self.model = model
        return model
    
    def prepare_data(self, data_dir):
        """데이터 제너레이터 설정"""
        train_dir = os.path.join(data_dir, 'train')
        val_dir = os.path.join(data_dir, 'val')
        test_dir = os.path.join(data_dir, 'test')

        # 클래스별 이미지 수 확인
        total_ok = len(os.listdir(os.path.join(train_dir, 'OK')))
        total_ng = len(os.listdir(os.path.join(train_dir, 'NG')))
        print(f"Training data distribution:")
        print(f"OK: {total_ok} images")
        print(f"NG: {total_ng} images")

        # 데이터 증강 설정
        train_datagen = ImageDataGenerator(
            rescale=1./255,
            rotation_range=45,
            width_shift_range=0.3,
            height_shift_range=0.3,
            shear_range=0.2,
            zoom_range=0.3,
            horizontal_flip=True,
            vertical_flip=True,
            brightness_range=[0.8, 1.2],
            fill_mode='nearest'
        )

        valid_datagen = ImageDataGenerator(
            rescale=1./255
        )

        # tensorflow 데이터셋으로 변환
        def to_tf_dataset(generator):
            return tf.data.Dataset.from_generator(
                lambda: generator,
                output_signature=(
                    tf.TensorSpec(shape=(None, *self.img_size, 3), dtype=tf.float32),
                    tf.TensorSpec(shape=(None,), dtype=tf.float32)
                )
            )

        # 데이터 제너레이터 생성
        train_generator = train_datagen.flow_from_directory(
            train_dir,
            target_size=self.img_size,
            batch_size=self.batch_size,
            class_mode='binary',
            shuffle=True,
            seed=42
        )

        valid_generator = valid_datagen.flow_from_directory(
            val_dir,
            target_size=self.img_size,
            batch_size=self.batch_size,
            class_mode='binary',
            shuffle=False,
            seed=42
        )

        test_generator = valid_datagen.flow_from_directory(
            test_dir,
            target_size=self.img_size,
            batch_size=self.batch_size,
            class_mode='binary',
            shuffle=False,
            seed=42
        )

        # tensorflow 데이터셋으로 변환
        train_ds = to_tf_dataset(train_generator)
        valid_ds = to_tf_dataset(valid_generator)
        test_ds = to_tf_dataset(test_generator)

        return train_ds, valid_ds, test_ds, total_ok, total_ng

    def train(self, train_ds, valid_ds, total_ok, total_ng, epochs=20):
        """모델 훈련"""
        # 클래스 가중치 계산
        total = total_ok + total_ng
        weight_for_0 = total / (2.0 * total_ok)
        weight_for_1 = total / (2.0 * total_ng)
        class_weight = {0: weight_for_0, 1: weight_for_1}

        print(f"\nClass weights:")
        print(f"OK (class 0): {weight_for_0:.3f}")
        print(f"NG (class 1): {weight_for_1:.3f}\n")

        callbacks = [
            tf.keras.callbacks.ModelCheckpoint(
                'best_model.keras',
                monitor='val_auc',
                mode='max',
                save_best_only=True,
                verbose=1
            ),
            tf.keras.callbacks.EarlyStopping(
                monitor='val_auc',
                mode='max',
                patience=8,
                restore_best_weights=True,
                verbose=1
            ),
            tf.keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.1,
                patience=4,
                min_lr=1e-6,
                verbose=1
            )
        ]

        self.history = self.model.fit(
            train_ds,
            epochs=epochs,
            validation_data=valid_ds,
            callbacks=callbacks,
            class_weight=class_weight,
            verbose=1
        )

        return self.history
    
    def evaluate(self, test_generator):
        """모델 평가 및 다양한 지표 계산"""
        # 예측 수행
        predictions = self.model.predict(test_generator, verbose=1)
        y_pred = (predictions > 0.5).astype(int)
        y_true = test_generator.labels
        
        # 분류 보고서 출력
        print("\nClassification Report:")
        print(classification_report(y_true, y_pred, target_names=['OK', 'NG']))
        
        # 혼동 행렬 시각화
        self.plot_confusion_matrix(y_true, y_pred)
        
        # ROC 커브 시각화
        self.plot_roc_curve(y_true, predictions)
        
        # 학습 곡선 시각화
        self.plot_learning_curves()
        
        return predictions
    
    def plot_confusion_matrix(self, y_true, y_pred):
        """혼동 행렬 시각화"""
        plt.figure(figsize=(8, 6))
        cm = confusion_matrix(y_true, y_pred)
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                   xticklabels=['OK', 'NG'],
                   yticklabels=['OK', 'NG'])
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.show()
    
    def plot_roc_curve(self, y_true, y_pred_proba):
        """ROC 커브 시각화"""
        fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
        roc_auc = auc(fpr, tpr)
        
        plt.figure(figsize=(8, 6))
        plt.plot(fpr, tpr, color='darkorange', lw=2,
                label=f'ROC curve (AUC = {roc_auc:.2f})')
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic (ROC) Curve')
        plt.legend(loc="lower right")
        plt.show()
    
    def plot_learning_curves(self):
        """학습 곡선 시각화"""
        metrics = ['loss', 'accuracy', 'auc_2']
        plt.figure(figsize=(15, 5))
        
        for i, metric in enumerate(metrics, 1):
            plt.subplot(1, 3, i)
            plt.plot(self.history.history[metric], label=f'Training {metric}')
            plt.plot(self.history.history[f'val_{metric}'], label=f'Validation {metric}')
            plt.title(f'Model {metric}')
            plt.xlabel('Epoch')
            plt.ylabel(metric)
            plt.legend()
        
        plt.tight_layout()
        plt.show()

def main():
    # 설정
    DATA_DIR = 'Original_data_for_model'
    IMG_SIZE = (224, 224)  # 이미지 크기 수정
    BATCH_SIZE = 32       # 배치 크기 수정
    EPOCHS = 20
    
    # 분류기 초기화 및 모델 생성
    classifier = DefectClassifier(img_size=IMG_SIZE, batch_size=BATCH_SIZE)
    model = classifier.create_model()
    
    # 데이터 준비
    train_generator, valid_generator, test_generator, total_ok, total_ng = classifier.prepare_data(DATA_DIR)
    
    # 모델 훈련
    history = classifier.train(train_generator, valid_generator, total_ok, total_ng, epochs=EPOCHS)
    
    # 모델 평가
    predictions = classifier.evaluate(test_generator)

if __name__ == "__main__":
    main()

Training data distribution:
OK: 354 images
NG: 260 images
Found 612 images belonging to 2 classes.
Found 165 images belonging to 2 classes.
Found 302 images belonging to 2 classes.

Class weights:
OK (class 0): 0.867
NG (class 1): 1.181

Epoch 1/20
   2836/Unknown [1m10407s[0m 4s/step - accuracy: 0.5338 - auc_3: 0.5144 - loss: 0.8192 - precision_3: 0.5833 - recall_3: 0.6699