<a href="https://colab.research.google.com/github/YoonDosik/Semiconductor_DACON/blob/master/Private_40th.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
import torch.nn.functional as F
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from sklearn.ensemble import IsolationForest
from tqdm import tqdm
import random
import os

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [None]:
# GPU 사용 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(41) # Seed 고정

Data Load

학습에 필요한 데이터를 불러오고, 전처리를 진행합니다.

In [None]:
# 데이터 로딩 클래스 정의
class CustomDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        """
        Args:
            csv_file (string): csv 파일의 경로.
            transform (callable, optional): 샘플에 적용될 Optional transform.
        """
        self.df = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df['img_path'].iloc[idx]
        image = Image.open("/content/gdrive/My Drive/Colab_Data/semiconductor_anomaly" + img_path[1:])
        if self.transform:
            image = self.transform(image)
        target = torch.tensor([0.]).float()
        return image,target

# 이미지 전처리 및 임베딩
transform = transforms.Compose([
    # 224,224
    transforms.CenterCrop((224, 224)),
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_data = CustomDataset(csv_file='/content/gdrive/My Drive/Colab_Data/semiconductor_anomaly/train.csv', transform=transform)
train_loader = DataLoader(train_data, batch_size=16, shuffle=False)

In [None]:
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(512, 1, bias=True)
model = model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0001)
scheduler = CosineAnnealingLR(optimizer, T_max=100, eta_min=0.00001)



In [None]:
def train(model, train_loader, criterion, optimizer, scheduler, device, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        running_corrects = 0
        total = 0

        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels.view(-1, 1))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            predictions = (torch.sigmoid(outputs) > 0.5).float()
            running_corrects += torch.sum(predictions == labels.view(-1, 1)).item()
            total += labels.size(0)

        scheduler.step()

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = running_corrects / total

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')

In [None]:
train(model, train_loader, criterion, optimizer, scheduler, device, num_epochs=20)

Epoch 1/20, Loss: 0.3157, Accuracy: 0.9906
Epoch 2/20, Loss: 0.1089, Accuracy: 1.0000
Epoch 3/20, Loss: 0.0402, Accuracy: 1.0000
Epoch 4/20, Loss: 0.0220, Accuracy: 1.0000
Epoch 5/20, Loss: 0.0153, Accuracy: 1.0000
Epoch 6/20, Loss: 0.0118, Accuracy: 1.0000
Epoch 7/20, Loss: 0.0096, Accuracy: 1.0000
Epoch 8/20, Loss: 0.0081, Accuracy: 1.0000
Epoch 9/20, Loss: 0.0070, Accuracy: 1.0000
Epoch 10/20, Loss: 0.0061, Accuracy: 1.0000
Epoch 11/20, Loss: 0.0054, Accuracy: 1.0000
Epoch 12/20, Loss: 0.0049, Accuracy: 1.0000
Epoch 13/20, Loss: 0.0044, Accuracy: 1.0000
Epoch 14/20, Loss: 0.0040, Accuracy: 1.0000
Epoch 15/20, Loss: 0.0036, Accuracy: 1.0000
Epoch 16/20, Loss: 0.0034, Accuracy: 1.0000
Epoch 17/20, Loss: 0.0031, Accuracy: 1.0000
Epoch 18/20, Loss: 0.0029, Accuracy: 1.0000
Epoch 19/20, Loss: 0.0027, Accuracy: 1.0000
Epoch 20/20, Loss: 0.0025, Accuracy: 1.0000


In [None]:
# 사전 학습된 모델 로드
model.eval()  # 추론 모드로 설정

# 특성 추출을 위한 모델의 마지막 레이어 수정
model = torch.nn.Sequential(*(list(model.children())[:-1]))

model.to(device)

# 이미지를 임베딩 벡터로 변환
def get_embeddings(dataloader, model):
    embeddings = []
    with torch.no_grad():
        for images, _ in tqdm(dataloader):
            images = images.to(device)
            emb = model(images)
            embeddings.append(emb.cpu().numpy().squeeze())
    return np.concatenate(embeddings, axis=0)

train_embeddings = get_embeddings(train_loader, model)

# 테스트 데이터에 대해 이상 탐지 수행
test_data = CustomDataset(csv_file='/content/gdrive/My Drive/Colab_Data/semiconductor_anomaly/test.csv', transform=transform)
test_loader = DataLoader(test_data, batch_size=16, shuffle=False)

test_embeddings = get_embeddings(test_loader, model)

100%|██████████| 14/14 [00:03<00:00,  4.23it/s]
100%|██████████| 7/7 [00:01<00:00,  4.65it/s]


In [None]:
!pip install pyod

Collecting pyod
  Downloading pyod-1.1.3.tar.gz (160 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/160.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.0/160.5 kB[0m [31m934.1 kB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━[0m [32m92.2/160.5 kB[0m [31m1.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m160.5/160.5 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyod
  Building wheel for pyod (setup.py) ... [?25l[?25hdone
  Created wheel for pyod: filename=pyod-1.1.3-py3-none-any.whl size=190251 sha256=551b1aeecee7069d67483edc128bc90c0c5d356bca33a0601f4c6eddcb049894
  Stored in directory: /root/.cache/pip/wheels/05/f8/db/124d43bec122d6ec0ab3713fadfe25ebed8af52ec561682b4e
Successful

In [None]:
from pyod.models.abod import ABOD

clf_name = 'ABOD'
abod = ABOD(n_neighbors=5)
abod.fit(train_embeddings)

abod_test_pred = abod.predict(test_embeddings)

print(abod_test_pred)


[0 1 1 1 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 1 0 0
 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 1 1 0 0 1 1 0 1 0 0 1 0 0 0 0
 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 0 0 0 0 0 1]


Unnamed: 0,id,label
0,TEST_000,0
1,TEST_001,1
2,TEST_002,1
3,TEST_003,1
4,TEST_004,0


In [None]:
from pyod.models.kde import KDE

clf_name = "KDE"
kde = KDE(bandwidth=2)
kde.fit(train_embeddings)

kde_test_pred = kde.predict(test_embeddings)

print(kde_test_pred)

[1 1 0 1 0 1 1 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 1 0 0 1 1 1 0 1 1 0 1
 0 0 1 0 0 0 0 0 0 0 1 1 1 0 1 0 1 0 1 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 0 0 0
 1 1 0 1 0 1 1 1 0 0 0 0 1 0 1 1 1 1 1 1 1 0 0 0 1 1]


In [None]:
pred_label = []

for i in range(len(abod_test_pred)):
  if abod_test_pred[i] == kde_test_pred[i]:
    pred_label.append(abod_test_pred[i])
  else:
    pred_label.append(0)

submit = pd.read_csv('/content/gdrive/My Drive/Colab_Data/semiconductor_anomaly/sample_submission.csv')
submit['label'] = pred_label
submit.to_csv('/content/gdrive/My Drive/Colab_Data/'+'ABOD_KDE_result.csv', index= None)
print(pred_label)

[1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1]
