## Import

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from sklearn.ensemble import IsolationForest
from tqdm import tqdm
import random

In [2]:
# GPU 사용 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device(type='cpu')

## Data Load

In [6]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(8) # Seed 고정

In [7]:
import os

class CustomDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        """
        Args:
            csv_file (string): csv 파일의 경로.
            transform (callable, optional): 샘플에 적용될 Optional transform.
        """
        self.df = pd.read_csv(csv_file)
        self.transform = transform
        self.img_dir = os.path.dirname(csv_file)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_filename = self.df['img_path'].iloc[idx]
        img_path = os.path.join(self.img_dir, img_filename)
        image = Image.open(img_path)
        if self.transform:
            image = self.transform(image)
        target = torch.tensor([0.]).float()
        return image, target


# 이미지 전처리 및 임베딩
train_transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_data = CustomDataset(csv_file='/content/drive/MyDrive/Project/반도체 소자/train.csv', transform=train_transform)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

In [11]:
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(in_features=512, out_features=1, bias=True)
model = model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)
scheduler = CosineAnnealingLR(optimizer, T_max=100, eta_min=0.00001)

In [12]:
def train(model, train_loader, criterion, optimizer, scheduler, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        running_corrects = 0
        total = 0

        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels.view(-1, 1))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            predictions = (torch.sigmoid(outputs) > 0.5).float()
            running_corrects += torch.sum(predictions == labels.view(-1, 1)).item()
            total += labels.size(0)

        scheduler.step()

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = running_corrects / total

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')

In [13]:
# 모델 학습 실행
train(model, train_loader, criterion, optimizer, scheduler, num_epochs=4)

Epoch 1/4, Loss: 0.2646, Accuracy: 0.8075
Epoch 2/4, Loss: 0.0057, Accuracy: 1.0000
Epoch 3/4, Loss: 0.0017, Accuracy: 1.0000
Epoch 4/4, Loss: 0.0009, Accuracy: 1.0000


## Export Embedding Vector

In [14]:
# 사전 학습된 모델 로드
model.eval()  # 추론 모드로 설정

# 특성 추출을 위한 모델의 마지막 레이어 수정
model = torch.nn.Sequential(*(list(model.children())[:-1]))

model.to(device)

# 이미지를 임베딩 벡터로 변환
def get_embeddings(dataloader, model):
    embeddings = []
    with torch.no_grad():
        for images, _ in tqdm(dataloader):
            images = images.to(device)
            emb = model(images)
            embeddings.append(emb.cpu().numpy().squeeze())
    return np.concatenate(embeddings, axis=0)

train_embeddings = get_embeddings(train_loader, model)

100%|██████████| 7/7 [02:11<00:00, 18.82s/it]


## Anomaly Detection Model Fitting

In [38]:
from sklearn.metrics import make_scorer, f1_score

# Isolation Forest 모델 학습
clf = IsolationForest(random_state=8,
                      contamination=0.16)
clf.fit(train_embeddings)

## Inference

In [39]:
test_transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
test_data = CustomDataset(csv_file='/content/drive/MyDrive/Project/반도체 소자/test.csv', transform=test_transform)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

test_embeddings = get_embeddings(test_loader, model)
test_pred = clf.predict(test_embeddings)

# Isolation Forest의 예측 결과(이상 = -1, 정상 = 1)를 이상 = 1, 정상 = 0으로 변환
test_pred = np.where(test_pred == -1, 1, 0)

100%|██████████| 4/4 [01:05<00:00, 16.50s/it]


## Submission

In [44]:
submit = pd.read_csv('/content/drive/MyDrive/Project/반도체 소자/sample_submission.csv')
submit['label'] = test_pred
submit['label'].value_counts()

Unnamed: 0,id,label
70,TEST_070,0
71,TEST_071,0
72,TEST_072,0
73,TEST_073,0
74,TEST_074,1
75,TEST_075,0
76,TEST_076,0
77,TEST_077,1
78,TEST_078,0
79,TEST_079,1


In [41]:
submit.to_csv('/content/drive/MyDrive/Project/반도체 소자/seed85842.csv', index = False)