## Import

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd "/content/drive/MyDrive/data/3d data"
#!unzip -q "/content/drive/MyDrive/data/3d data/open.zip"

In [None]:
import h5py # .h5 파일을 읽기 위한 패키지
import random
import pandas as pd
import numpy as np
import os
import glob
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from plotly.offline import iplot
from utils import EarlyStopping
from tqdm.auto import tqdm

from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings(action='ignore') 

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Hyperparameter Setting

In [None]:
CFG = {
    'EPOCHS':10,
    'LEARNING_RATE':1e-3,
    'BATCH_SIZE':64,
    'SEED':41,
    'PATIENCE':10,
}

## Fixed RandomSeed

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-processing

In [None]:
all_df = pd.read_csv('./train.csv')
all_points = h5py.File('./train.h5', 'r')

In [None]:
train_df = all_df.iloc[:int(len(all_df)*0.8)]
val_df = all_df.iloc[int(len(all_df)*0.8):]

## CustomDataset

In [None]:
class CustomDataset(Dataset):
    def __init__(self, id_list, label_list, point_list):
        self.id_list = id_list
        self.label_list = label_list
        self.point_list = point_list
        
    def __getitem__(self, index):
        image_id = self.id_list[index]
        
        # h5파일을 바로 접근하여 사용하면 학습 속도가 병목 현상으로 많이 느릴 수 있습니다.
        points = self.point_list[str(image_id)][:]
        image = self.get_vector(points)
        
        if self.label_list is not None:
            label = self.label_list[index]
            return torch.Tensor(image).unsqueeze(0), label
        else:
            return torch.Tensor(image).unsqueeze(0)
    
    def get_vector(self, points, x_y_z=[16, 16, 16]):
        # 3D Points -> [16,16,16]
        xyzmin = np.min(points, axis=0) - 0.001
        xyzmax = np.max(points, axis=0) + 0.001

        diff = max(xyzmax-xyzmin) - (xyzmax-xyzmin)
        xyzmin = xyzmin - diff / 2
        xyzmax = xyzmax + diff / 2

        segments = []
        shape = []

        for i in range(3):
            # note the +1 in num 
            if type(x_y_z[i]) is not int:
                raise TypeError("x_y_z[{}] must be int".format(i))
            s, step = np.linspace(xyzmin[i], xyzmax[i], num=(x_y_z[i] + 1), retstep=True)
            segments.append(s)
            shape.append(step)

        n_voxels = x_y_z[0] * x_y_z[1] * x_y_z[2]
        n_x = x_y_z[0]
        n_y = x_y_z[1]
        n_z = x_y_z[2]

        structure = np.zeros((len(points), 4), dtype=int)
        structure[:,0] = np.searchsorted(segments[0], points[:,0]) - 1
        structure[:,1] = np.searchsorted(segments[1], points[:,1]) - 1
        structure[:,2] = np.searchsorted(segments[2], points[:,2]) - 1

        # i = ((y * n_x) + x) + (z * (n_x * n_y))
        structure[:,3] = ((structure[:,1] * n_x) + structure[:,0]) + (structure[:,2] * (n_x * n_y)) 

        vector = np.zeros(n_voxels)
        count = np.bincount(structure[:,3])
        vector[:len(count)] = count

        vector = vector.reshape(n_z, n_y, n_x)
        return vector

    def __len__(self):
        return len(self.id_list)

In [None]:
train_dataset = CustomDataset(train_df['ID'].values,
                              train_df['label'].values,
                              all_points)

train_loader = DataLoader(train_dataset, 
                          batch_size = CFG['BATCH_SIZE'], 
                          shuffle=True,
                          pin_memory=True,    
                          num_workers=0)

val_dataset = CustomDataset(val_df['ID'].values, 
                            val_df['label'].values,
                            all_points)

val_loader = DataLoader(val_dataset,
                        batch_size=CFG['BATCH_SIZE'],
                        pin_memory=True,    
                        shuffle=False, num_workers=0)

## Model Define

In [None]:
class BaseModel(nn.Module):
    def __init__(self):
        super(BaseModel,self).__init__()
        self.feature_extract = nn.Sequential(
            nn.Conv3d(1,8,3),
            nn.ReLU(),
            nn.BatchNorm3d(8),
            nn.Conv3d(8,32,3),
            nn.ReLU(),
            nn.BatchNorm3d(32),
            nn.MaxPool3d(4),
            nn.Conv3d(32,32,3),
            nn.ReLU(),
        )
        self.classifier = nn.Linear(32,10)

    def forward(self,x):
        x = self.feature_extract(x)
        x = x.view(x.size()[0],-1)
        x = self.classifier(x)
        return x

## Train

In [None]:
use_amp = True
save_path='best_model.pt'
scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
early_stopping = EarlyStopping(patience = CFG['PATIENCE'], verbose = True, path =save_path )

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    best_score = 0
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for data, label in tqdm(iter(train_loader)):
            data, label = data.float().to(device), label.long().to(device)
            optimizer.zero_grad()
            
            output = model(data)
            loss = criterion(output, label)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
                       
            train_loss.append(loss.item())
        
        if scheduler is not None:
            scheduler.step()
            
        val_loss, val_acc = validation(model, criterion, val_loader, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss)}] Val Loss : [{val_loss}] Val ACC : [{val_acc}]')
        early_stopping(-val_acc, model)
        if early_stopping.early_stop:
          print("Early stopping")
          break


In [None]:
def validation(model, criterion, val_loader, device):
    model.eval()
    true_labels = []
    model_preds = []
    val_loss = []
    with torch.no_grad():
        for data, label in tqdm(iter(val_loader)):
            data, label = data.float().to(device), label.long().to(device)
            
            model_pred = model(data)
            loss = criterion(model_pred, label)
            
            val_loss.append(loss.item())
            
            model_preds += model_pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += label.detach().cpu().numpy().tolist()
    
    return np.mean(val_loss), accuracy_score(true_labels, model_preds)

## Run!!

In [35]:
model = BaseModel()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer,T_max=100, eta_min=0.001)

train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [0.26012198937051] Val Loss : [0.14489717888254672] Val ACC : [0.9538]
EarlyStopping counter: 5 out of 10


  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.11341217293827795] Val Loss : [0.11077103664539754] Val ACC : [0.9659]
EarlyStopping counter: 6 out of 10


  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.08883776309409877] Val Loss : [0.12169625080619008] Val ACC : [0.9634]
EarlyStopping counter: 7 out of 10


  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : [4] Train Loss : [0.07479553294745855] Val Loss : [0.10661227523589041] Val ACC : [0.9694]
EarlyStopping counter: 8 out of 10


  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : [5] Train Loss : [0.061765143726574025] Val Loss : [0.0907424981529155] Val ACC : [0.9745]
Validation loss decreased (-0.974500 --> -0.974500).  Saving model ...


  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : [6] Train Loss : [0.054519636883867496] Val Loss : [0.11622381287440949] Val ACC : [0.9708]
EarlyStopping counter: 1 out of 10


  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : [7] Train Loss : [0.047141001965533356] Val Loss : [0.1022269350266346] Val ACC : [0.9749]
Validation loss decreased (-0.974500 --> -0.974900).  Saving model ...


  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : [8] Train Loss : [0.04083448157600142] Val Loss : [0.10001304563867598] Val ACC : [0.9729]
EarlyStopping counter: 1 out of 10


  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : [9] Train Loss : [0.0360892931938431] Val Loss : [0.10658134150555706] Val ACC : [0.9737]
EarlyStopping counter: 2 out of 10


  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : [10] Train Loss : [0.03114886578069959] Val Loss : [0.10121469833154442] Val ACC : [0.9756]
Validation loss decreased (-0.974900 --> -0.975600).  Saving model ...
