In [43]:
import ast
import os
import random
from pathlib import Path

import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, f1_score
from wafer_dataset import WaferDataset, parse_failure_type_field
import os, time

from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, f1_score
from torchsummary import summary
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from wafer_dataset import WaferDataset, parse_failure_type_field
# ---------------------------

In [44]:
CSV_PATH = "wb_withpattern.pkl"
df = pd.read_pickle(CSV_PATH)
df.head(1)

Unnamed: 0,index,waferMap,dieSize,lotName,trianTestLabel,failureType,waferMapDim,failureNum,trainTestNum
0,19,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1683.0,lot1,[[Training]],[[Loc]],"(45, 48)",4,0


In [45]:
IMG_SIZE = 56  # 56x56 입력 크기
BATCH_SIZE = 96
SEED = 1414

print("Loading pkl...", CSV_PATH)
df = pd.read_pickle(CSV_PATH)

train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df['failureNum'],
    random_state=SEED,
)
num_classes = int(df['failureNum'].nunique())

train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.Grayscale(num_output_channels=1),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor(),
])

val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
])

train_dataset = WaferDataset(train_df, transform=train_transform)
val_dataset = WaferDataset(val_df, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)


Loading pkl... wb_withpattern.pkl


### Teacher-Student(지식 증류) 접근 방식
- 입력 크기를 56x56으로 고정하고, **깊은 Teacher CNN**과 **경량 Student CNN**을 각각 설계합니다.
- Teacher는 다중 컨볼루션 블록/Dropout을 사용해 충분한 표현력을 확보하고, Student는 채널 수를 줄여 추론 속도를 높입니다.
- Student 학습 시 `CrossEntropyLoss`와 `KLDivLoss` 기반 soft target을 혼합해 지식 증류(distillation)를 수행합니다.
- 클래스 불균형은 여전히 클래스 가중치(역빈도 방식)를 적용해 완화합니다.

In [62]:
# 재현성 및 기본 설정
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

class_counts = train_df['failureNum'].value_counts().sort_index()
class_weights = torch.tensor(
    [len(train_df) / (len(class_counts) * class_counts[c]) for c in class_counts.index],
    dtype=torch.float32,
    device=device,
)



Using device: cuda


In [63]:
class TeacherNet(nn.Module):
    def __init__(self, num_classes: int):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.4),
            nn.Linear(128 * (IMG_SIZE // 8) * (IMG_SIZE // 8), 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.4),
            nn.Linear(256, num_classes),
        )

    def forward(self, x):
        return self.classifier(self.features(x))


class StudentNet(nn.Module):
    def __init__(self, num_classes: int):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 24, 3, padding=1),
            nn.BatchNorm2d(24),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(24, 48, 3, padding=1),
            nn.BatchNorm2d(48),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(48, 96, 3, padding=1),
            nn.BatchNorm2d(96),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.3),
            nn.Linear(96 * (IMG_SIZE // 8) * (IMG_SIZE // 8), 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, num_classes),
        )

    def forward(self, x):
        return self.classifier(self.features(x))


def init_models():
    teacher = TeacherNet(num_classes).to(device)
    student = StudentNet(num_classes).to(device)
    return teacher, student


t_teacher = TeacherNet(num_classes).to(device)
t_student = StudentNet(num_classes).to(device)

teacher_criterion = nn.CrossEntropyLoss(weight=class_weights)
teacher_optimizer = optim.AdamW(t_teacher.parameters(), lr=3e-4, weight_decay=1e-4)
student_optimizer = optim.AdamW(t_student.parameters(), lr=5e-4, weight_decay=5e-5)
student_ce = nn.CrossEntropyLoss(weight=class_weights)



In [64]:

summary(t_teacher,(1,56,56), batch_size=96)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [96, 32, 56, 56]             320
       BatchNorm2d-2           [96, 32, 56, 56]              64
              ReLU-3           [96, 32, 56, 56]               0
            Conv2d-4           [96, 32, 56, 56]           9,248
       BatchNorm2d-5           [96, 32, 56, 56]              64
              ReLU-6           [96, 32, 56, 56]               0
         MaxPool2d-7           [96, 32, 28, 28]               0
            Conv2d-8           [96, 64, 28, 28]          18,496
       BatchNorm2d-9           [96, 64, 28, 28]             128
             ReLU-10           [96, 64, 28, 28]               0
           Conv2d-11           [96, 64, 28, 28]          36,928
      BatchNorm2d-12           [96, 64, 28, 28]             128
             ReLU-13           [96, 64, 28, 28]               0
        MaxPool2d-14           [96, 64,

In [65]:

summary(t_student,(1,56,56), batch_size=96)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [96, 24, 56, 56]             240
       BatchNorm2d-2           [96, 24, 56, 56]              48
              ReLU-3           [96, 24, 56, 56]               0
         MaxPool2d-4           [96, 24, 28, 28]               0
            Conv2d-5           [96, 48, 28, 28]          10,416
       BatchNorm2d-6           [96, 48, 28, 28]              96
              ReLU-7           [96, 48, 28, 28]               0
         MaxPool2d-8           [96, 48, 14, 14]               0
            Conv2d-9           [96, 96, 14, 14]          41,568
      BatchNorm2d-10           [96, 96, 14, 14]             192
             ReLU-11           [96, 96, 14, 14]               0
        MaxPool2d-12             [96, 96, 7, 7]               0
          Flatten-13                 [96, 4704]               0
          Dropout-14                 [9

In [66]:
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    running_loss, running_corrects, total = 0.0, 0, 0
    for xb, yb in tqdm(loader, desc="Train", leave=False):
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * xb.size(0)
        preds = torch.argmax(logits, dim=1)
        running_corrects += (preds == yb).sum().item()
        total += xb.size(0)

    return running_loss / total, running_corrects / total


def evaluate(model, loader, criterion):
    model.eval()
    running_loss, running_corrects, total = 0.0, 0, 0
    ys_true, ys_pred = [], []

    with torch.no_grad():
        for xb, yb in tqdm(loader, desc="Eval", leave=False):
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            loss = criterion(logits, yb)

            running_loss += loss.item() * xb.size(0)
            preds = torch.argmax(logits, dim=1)
            running_corrects += (preds == yb).sum().item()
            total += xb.size(0)

            ys_true.append(yb.cpu().numpy())
            ys_pred.append(preds.cpu().numpy())

    ys_true = np.concatenate(ys_true)
    ys_pred = np.concatenate(ys_pred)

    return (
        running_loss / total,
        running_corrects / total,
        ys_true,
        ys_pred,
    )



In [67]:
import torch.nn.functional as F

In [60]:
EPOCHS_TEACHER = 50
teacher_history = []
for epoch in range(1, EPOCHS_TEACHER + 1):
    print(f"[Teacher] Epoch {epoch}/{EPOCHS_TEACHER}")
    train_loss, train_acc = train_one_epoch(t_teacher, train_loader, teacher_optimizer, teacher_criterion)
    val_loss, val_acc, _, _ = evaluate(t_teacher, val_loader, teacher_criterion)
    teacher_history.append({
        "epoch": epoch,
        "train_loss": train_loss,
        "train_acc": train_acc,
        "val_loss": val_loss,
        "val_acc": val_acc,
    })
    print(
        f"  train_loss={train_loss:.4f} | train_acc={train_acc:.4f} | "
        f"val_loss={val_loss:.4f} | val_acc={val_acc:.4f}"
    )


def distillation_loss(student_logits, teacher_logits, labels, temperature=4.0, alpha=0.7):
    hard_loss = student_ce(student_logits, labels)
    soft_loss = F.kl_div(
        F.log_softmax(student_logits / temperature, dim=1),
        F.softmax(teacher_logits / temperature, dim=1),
        reduction='batchmean'
    ) * (temperature ** 2)
    return alpha * soft_loss + (1 - alpha) * hard_loss


def distill_student(student, teacher, loader, optimizer, temperature=4.0, alpha=0.7):
    student.train()
    teacher.eval()
    running_loss, running_corrects, total = 0.0, 0, 0
    for xb, yb in tqdm(loader, desc="Distill", leave=False):
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        with torch.no_grad():
            teacher_logits = teacher(xb)
        student_logits = student(xb)
        loss = distillation_loss(student_logits, teacher_logits, yb, temperature, alpha)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * xb.size(0)
        preds = torch.argmax(student_logits, dim=1)
        running_corrects += (preds == yb).sum().item()
        total += xb.size(0)

    return running_loss / total, running_corrects / total


EPOCHS_STUDENT = 35
student_history = []
for epoch in range(1, EPOCHS_STUDENT + 1):
    print(f"[Student] Epoch {epoch}/{EPOCHS_STUDENT}")
    train_loss, train_acc = distill_student(t_student, t_teacher, train_loader, student_optimizer, temperature=4.0, alpha=0.7)
    val_loss, val_acc, _, _ = evaluate(t_student, val_loader, student_ce)
    student_history.append({
        "epoch": epoch,
        "train_loss": train_loss,
        "train_acc": train_acc,
        "val_loss": val_loss,
        "val_acc": val_acc,
    })
    print(
        f"  train_loss={train_loss:.4f} | train_acc={train_acc:.4f} | "
        f"val_loss={val_loss:.4f} | val_acc={val_acc:.4f}"
    )



[Teacher] Epoch 1/50


Train:   0%|          | 0/213 [00:00<?, ?it/s]

                                                        

  train_loss=0.8619 | train_acc=0.6808 | val_loss=0.6059 | val_acc=0.7937
[Teacher] Epoch 2/50


                                                        

  train_loss=0.5580 | train_acc=0.7839 | val_loss=0.6282 | val_acc=0.7800
[Teacher] Epoch 3/50


                                                        

  train_loss=0.5288 | train_acc=0.8085 | val_loss=0.6076 | val_acc=0.7716
[Teacher] Epoch 4/50


                                                        

  train_loss=0.4541 | train_acc=0.8297 | val_loss=0.5303 | val_acc=0.7710
[Teacher] Epoch 5/50


                                                        

  train_loss=0.4120 | train_acc=0.8500 | val_loss=0.4628 | val_acc=0.8719
[Teacher] Epoch 6/50


                                                        

  train_loss=0.3453 | train_acc=0.8701 | val_loss=0.3953 | val_acc=0.8237
[Teacher] Epoch 7/50


                                                        

  train_loss=0.3363 | train_acc=0.8753 | val_loss=0.6117 | val_acc=0.7443
[Teacher] Epoch 8/50


                                                        

  train_loss=0.2998 | train_acc=0.8860 | val_loss=0.3413 | val_acc=0.8691
[Teacher] Epoch 9/50


                                                        

  train_loss=0.2962 | train_acc=0.8923 | val_loss=0.3554 | val_acc=0.8425
[Teacher] Epoch 10/50


                                                        

  train_loss=0.2729 | train_acc=0.8981 | val_loss=0.6264 | val_acc=0.8468
[Teacher] Epoch 11/50


                                                        

  train_loss=0.2622 | train_acc=0.9048 | val_loss=0.4084 | val_acc=0.8666
[Teacher] Epoch 12/50


                                                        

  train_loss=0.2449 | train_acc=0.9058 | val_loss=0.2967 | val_acc=0.9138
[Teacher] Epoch 13/50


                                                        

  train_loss=0.2548 | train_acc=0.9078 | val_loss=0.3057 | val_acc=0.8954
[Teacher] Epoch 14/50


                                                        

  train_loss=0.2547 | train_acc=0.9051 | val_loss=0.9067 | val_acc=0.6444
[Teacher] Epoch 15/50


                                                        

  train_loss=0.2365 | train_acc=0.9125 | val_loss=0.3606 | val_acc=0.8856
[Teacher] Epoch 16/50


                                                        

  train_loss=0.2271 | train_acc=0.9160 | val_loss=0.3733 | val_acc=0.8548
[Teacher] Epoch 17/50


                                                        

  train_loss=0.2174 | train_acc=0.9173 | val_loss=1.2600 | val_acc=0.6900
[Teacher] Epoch 18/50


                                                        

  train_loss=0.2064 | train_acc=0.9203 | val_loss=0.3265 | val_acc=0.8895
[Teacher] Epoch 19/50


                                                        

  train_loss=0.2053 | train_acc=0.9192 | val_loss=1.3891 | val_acc=0.7239
[Teacher] Epoch 20/50


                                                        

  train_loss=0.2078 | train_acc=0.9217 | val_loss=1.9333 | val_acc=0.5984
[Teacher] Epoch 21/50


                                                        

  train_loss=0.2126 | train_acc=0.9194 | val_loss=0.3433 | val_acc=0.8676
[Teacher] Epoch 22/50


                                                        

  train_loss=0.2144 | train_acc=0.9168 | val_loss=0.4300 | val_acc=0.8252
[Teacher] Epoch 23/50


                                                        

  train_loss=0.2012 | train_acc=0.9267 | val_loss=0.2334 | val_acc=0.9246
[Teacher] Epoch 24/50


                                                        

  train_loss=0.2020 | train_acc=0.9275 | val_loss=0.3599 | val_acc=0.8482
[Teacher] Epoch 25/50


                                                        

  train_loss=0.1920 | train_acc=0.9282 | val_loss=0.9431 | val_acc=0.7790
[Teacher] Epoch 26/50


                                                        

  train_loss=0.1828 | train_acc=0.9297 | val_loss=0.2486 | val_acc=0.9250
[Teacher] Epoch 27/50


                                                        

  train_loss=0.1723 | train_acc=0.9303 | val_loss=0.3108 | val_acc=0.8607
[Teacher] Epoch 28/50


                                                        

  train_loss=0.1797 | train_acc=0.9306 | val_loss=0.6767 | val_acc=0.7862
[Teacher] Epoch 29/50


                                                        

  train_loss=0.1768 | train_acc=0.9328 | val_loss=0.8795 | val_acc=0.7806
[Teacher] Epoch 30/50


                                                        

  train_loss=0.1841 | train_acc=0.9304 | val_loss=0.5609 | val_acc=0.8815
[Teacher] Epoch 31/50


                                                        

  train_loss=0.1678 | train_acc=0.9345 | val_loss=0.7453 | val_acc=0.8043
[Teacher] Epoch 32/50


                                                        

  train_loss=0.1720 | train_acc=0.9338 | val_loss=0.3067 | val_acc=0.9024
[Teacher] Epoch 33/50


                                                        

  train_loss=0.1652 | train_acc=0.9359 | val_loss=1.1126 | val_acc=0.7655
[Teacher] Epoch 34/50


                                                        

  train_loss=0.1461 | train_acc=0.9410 | val_loss=0.2406 | val_acc=0.9193
[Teacher] Epoch 35/50


                                                        

  train_loss=0.1557 | train_acc=0.9394 | val_loss=0.4077 | val_acc=0.8640
[Teacher] Epoch 36/50


                                                        

  train_loss=0.1608 | train_acc=0.9398 | val_loss=0.2544 | val_acc=0.9060
[Teacher] Epoch 37/50


                                                        

  train_loss=0.1458 | train_acc=0.9413 | val_loss=0.3263 | val_acc=0.9099
[Teacher] Epoch 38/50


                                                        

  train_loss=0.1458 | train_acc=0.9420 | val_loss=0.7356 | val_acc=0.7083
[Teacher] Epoch 39/50


                                                        

  train_loss=0.1447 | train_acc=0.9416 | val_loss=0.3987 | val_acc=0.8058
[Teacher] Epoch 40/50


                                                        

  train_loss=0.1508 | train_acc=0.9398 | val_loss=1.9079 | val_acc=0.6062
[Teacher] Epoch 41/50


                                                        

  train_loss=0.1387 | train_acc=0.9434 | val_loss=0.6181 | val_acc=0.7371
[Teacher] Epoch 42/50


                                                        

  train_loss=0.1494 | train_acc=0.9431 | val_loss=0.4635 | val_acc=0.8870
[Teacher] Epoch 43/50


                                                        

  train_loss=0.1383 | train_acc=0.9431 | val_loss=0.3130 | val_acc=0.9152
[Teacher] Epoch 44/50


                                                        

  train_loss=0.1341 | train_acc=0.9466 | val_loss=0.2490 | val_acc=0.9105
[Teacher] Epoch 45/50


                                                        

  train_loss=0.1416 | train_acc=0.9438 | val_loss=2.0094 | val_acc=0.7543
[Teacher] Epoch 46/50


                                                        

  train_loss=0.1402 | train_acc=0.9443 | val_loss=0.2353 | val_acc=0.9310
[Teacher] Epoch 47/50


                                                        

  train_loss=0.1181 | train_acc=0.9503 | val_loss=0.6470 | val_acc=0.8662
[Teacher] Epoch 48/50


                                                        

  train_loss=0.1346 | train_acc=0.9469 | val_loss=0.3376 | val_acc=0.8617
[Teacher] Epoch 49/50


                                                        

  train_loss=0.1175 | train_acc=0.9505 | val_loss=0.2585 | val_acc=0.9154
[Teacher] Epoch 50/50


                                                        

  train_loss=0.1320 | train_acc=0.9478 | val_loss=0.6043 | val_acc=0.8260
[Student] Epoch 1/35


                                                          

  train_loss=2.3759 | train_acc=0.7118 | val_loss=2.5935 | val_acc=0.5278
[Student] Epoch 2/35


                                                          

  train_loss=1.0936 | train_acc=0.7784 | val_loss=1.9633 | val_acc=0.5893
[Student] Epoch 3/35


                                                          

  train_loss=0.9113 | train_acc=0.7959 | val_loss=2.4875 | val_acc=0.2451
[Student] Epoch 4/35


                                                          

  train_loss=0.7759 | train_acc=0.8021 | val_loss=5.7194 | val_acc=0.4257
[Student] Epoch 5/35


                                                          

  train_loss=0.7134 | train_acc=0.8094 | val_loss=2.0553 | val_acc=0.5378
[Student] Epoch 6/35


                                                          

  train_loss=0.6233 | train_acc=0.8142 | val_loss=3.0451 | val_acc=0.5284
[Student] Epoch 7/35


                                                          

  train_loss=0.6002 | train_acc=0.8202 | val_loss=2.4594 | val_acc=0.6013
[Student] Epoch 8/35


                                                          

  train_loss=0.5418 | train_acc=0.8232 | val_loss=1.2525 | val_acc=0.7128
[Student] Epoch 9/35


                                                          

  train_loss=0.5101 | train_acc=0.8278 | val_loss=2.5639 | val_acc=0.7371
[Student] Epoch 10/35


                                                          

  train_loss=0.5158 | train_acc=0.8240 | val_loss=1.3351 | val_acc=0.7149
[Student] Epoch 11/35


                                                          

  train_loss=0.4670 | train_acc=0.8320 | val_loss=1.3934 | val_acc=0.6311
[Student] Epoch 12/35


                                                          

  train_loss=0.4470 | train_acc=0.8332 | val_loss=2.6060 | val_acc=0.5174
[Student] Epoch 13/35


                                                          

  train_loss=0.4283 | train_acc=0.8347 | val_loss=1.8594 | val_acc=0.5960
[Student] Epoch 14/35


                                                          

  train_loss=0.4377 | train_acc=0.8342 | val_loss=3.8525 | val_acc=0.4722
[Student] Epoch 15/35


                                                          

  train_loss=0.4116 | train_acc=0.8395 | val_loss=0.5579 | val_acc=0.8056
[Student] Epoch 16/35


                                                          

  train_loss=0.4124 | train_acc=0.8397 | val_loss=2.9402 | val_acc=0.5047
[Student] Epoch 17/35


                                                          

  train_loss=0.3914 | train_acc=0.8421 | val_loss=0.8620 | val_acc=0.8092
[Student] Epoch 18/35


                                                          

  train_loss=0.3829 | train_acc=0.8428 | val_loss=1.1556 | val_acc=0.6503
[Student] Epoch 19/35


                                                          

  train_loss=0.3773 | train_acc=0.8430 | val_loss=2.3449 | val_acc=0.7112
[Student] Epoch 20/35


                                                          

  train_loss=0.3683 | train_acc=0.8449 | val_loss=1.6365 | val_acc=0.6007
[Student] Epoch 21/35


                                                          

  train_loss=0.3662 | train_acc=0.8417 | val_loss=3.5410 | val_acc=0.4201
[Student] Epoch 22/35


                                                          

  train_loss=0.3554 | train_acc=0.8417 | val_loss=1.7492 | val_acc=0.6691
[Student] Epoch 23/35


                                                          

  train_loss=0.3549 | train_acc=0.8433 | val_loss=1.5883 | val_acc=0.6644
[Student] Epoch 24/35


                                                          

  train_loss=0.3468 | train_acc=0.8473 | val_loss=0.8312 | val_acc=0.7806
[Student] Epoch 25/35


                                                          

  train_loss=0.3428 | train_acc=0.8486 | val_loss=2.5331 | val_acc=0.4984
[Student] Epoch 26/35


                                                          

  train_loss=0.3405 | train_acc=0.8467 | val_loss=0.7295 | val_acc=0.6669
[Student] Epoch 27/35


                                                          

  train_loss=0.3299 | train_acc=0.8485 | val_loss=4.9331 | val_acc=0.3578
[Student] Epoch 28/35


                                                          

  train_loss=0.3286 | train_acc=0.8506 | val_loss=3.4213 | val_acc=0.3303
[Student] Epoch 29/35


                                                          

  train_loss=0.3338 | train_acc=0.8484 | val_loss=7.8956 | val_acc=0.1456
[Student] Epoch 30/35


                                                          

  train_loss=0.3192 | train_acc=0.8487 | val_loss=2.6142 | val_acc=0.5458
[Student] Epoch 31/35


                                                          

  train_loss=0.3210 | train_acc=0.8487 | val_loss=1.4038 | val_acc=0.6675
[Student] Epoch 32/35


                                                          

  train_loss=0.3157 | train_acc=0.8484 | val_loss=3.0389 | val_acc=0.4992
[Student] Epoch 33/35


                                                          

  train_loss=0.3113 | train_acc=0.8498 | val_loss=4.3425 | val_acc=0.4722
[Student] Epoch 34/35


                                                          

  train_loss=0.3047 | train_acc=0.8518 | val_loss=2.7799 | val_acc=0.5345
[Student] Epoch 35/35


                                                          

  train_loss=0.3033 | train_acc=0.8514 | val_loss=2.1321 | val_acc=0.5864




In [61]:
teacher_val_loss, teacher_val_acc, teacher_y_true, teacher_y_pred = evaluate(t_teacher, val_loader, teacher_criterion)
print(f"[Teacher] val_loss={teacher_val_loss:.4f}, val_acc={teacher_val_acc:.4f}")
print(classification_report(teacher_y_true, teacher_y_pred, digits=4))

student_val_loss, student_val_acc, student_y_true, student_y_pred = evaluate(t_student, val_loader, student_ce)
print(f"[Student] val_loss={student_val_loss:.4f}, val_acc={student_val_acc:.4f}")
print(classification_report(student_y_true, student_y_pred, digits=4))



                                                     

[Teacher] val_loss=0.6043, val_acc=0.8260
              precision    recall  f1-score   support

           0     0.9748    0.7648    0.8571       859
           1     0.8403    0.9009    0.8696       111
           2     0.8970    0.7890    0.8396      1038
           3     0.9874    0.9277    0.9566      1936
           4     0.8312    0.6439    0.7257       719
           5     0.2937    0.9133    0.4444       173
           6     0.4488    0.8109    0.5778       238
           7     0.5556    1.0000    0.7143        30

    accuracy                         0.8260      5104
   macro avg     0.7286    0.8438    0.7481      5104
weighted avg     0.8905    0.8260    0.8452      5104



                                                     

[Student] val_loss=2.1321, val_acc=0.5864
              precision    recall  f1-score   support

           0     1.0000    0.1758    0.2990       859
           1     0.8545    0.8468    0.8507       111
           2     0.7460    0.4952    0.5953      1038
           3     1.0000    0.7753    0.8734      1936
           4     0.4294    0.7232    0.5389       719
           5     0.1327    0.9942    0.2342       173
           6     0.1797    0.0966    0.1257       238
           7     1.0000    0.6000    0.7500        30

    accuracy                         0.5864      5104
   macro avg     0.6678    0.5884    0.5334      5104
weighted avg     0.7972    0.5864    0.6153      5104





입력 및 전처리
wb_withpattern.pkl에서 웨이퍼 맵을 읽고 failureNum 기준 stratified split(80/20)으로 train_df, val_df 구성.
모든 이미지를 56x56, 단일 채널로 변환한 뒤 RandomHorizontalFlip, RandomVerticalFlip으로 간단한 증강을 적용하여 데이터 다양성 확보.
배치 크기 96, torch.utils.data.DataLoader로 로딩.


학습 구조
TeacherNet
입력 채널 1, Conv-BN-ReLU 쌍을 3개 블록(32→64→128ch)으로 구성, 각 블록 끝에 MaxPool2d로 다운샘플.
Flatten → Dropout(0.4) → Linear(256) → Dropout(0.4) → Linear(num_classes)로 분류 헤드 구성.

StudentNet
더 얕고 좁은 구조(24→48→96ch)로 구성해 파라미터 수를 줄임.
분류 헤드는 Dropout(0.3) → Linear(128) → ReLU → Linear(num_classes).

지식 증류(Knowledge Distillation)
Teacher를 50 epoch 동안 단독 학습해 고성능 모델을 먼저 확보.
Student는 teacher logits을 활용한 soft loss와 실제 라벨 hard loss를 혼합하여 학습:
L_soft: KLDivLoss(teacher/student softmax, temperature=4.0)
L_hard: CrossEntropyLoss(student logits vs labels)
최종 손실: α·L_hard + (1-α)·L_soft (α=0.7)

Teacher와 Student 모두에 대해 validation set accuracy 및 classification_report 출력

loss 부분이 검색해서 설명하는 부분이 식이 달라서 혼동이 왔음.
L=α⋅L hard+(1−α)⋅L
L=α⋅Lsoft+(1−α)⋅Lhard

​Hard Loss (CrossEntropy)
학생 모델의 출력과 실제 레이블에 기반
오직 정답 클래스에만 집중하므로 “어떤 클래스가 맞다”는 강한 신호는 주지만, 비정답 클래스 사이의 미묘한 관계를 알려주진 못함
Soft Loss (KL Divergence)
Teacher 모델의 softmax 분포와 학생 모델의 softmax 분포를 직접 비교
Teacher가 “정답 외의 다른 클래스는 어느 정도 가능성이 있는지”까지 알려 주기 때문에, 클래스 간 유사성이나 패턴을 학생도 학습
핵심은 “어느 쪽 손실에 가중치를 더 둘지”에 관한 선택