In [1]:
import h5py
import random
import pandas as pd
import numpy as np
import os
import glob

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as trasnforms

from tqdm.auto import tqdm

from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings(action='ignore') 

import matplotlib.pyplot as plt

In [2]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda', index=0)

In [3]:
CFG = {
    'EPOCHS':100,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':16,
    'SEED':41
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [5]:
all_df = pd.read_csv('/data/jyji/datasets/3D_NUMBER/train.csv') # 트레이닝 데이터 라벨정보
all_points= h5py.File('/data/jyji/datasets/3D_NUMBER/train.h5','r') # x,y,z 위치정보

In [6]:
from plotly.offline import iplot
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

In [7]:
all_df.head()

Unnamed: 0,ID,label
0,0,5
1,1,0
2,2,4
3,3,1
4,4,9


In [8]:
np.array(all_points['0']).shape

(25700, 3)

In [9]:
idx = 0
all_df.iloc[idx].label

5

In [10]:
idx = 0
data = np.array(all_points[str(idx)])
print(data)
print(data.shape)

[[ 0.15       -0.1094358   0.47305447]
 [ 0.15       -0.0969358   0.47305447]
 [ 0.15       -0.0844358   0.47305447]
 ...
 [ 0.15       -0.1094358  -0.50194553]
 [ 0.15       -0.1094358  -0.51444553]
 [ 0.15       -0.1094358  -0.52694553]]
(25700, 3)


In [11]:
layout = go.Layout(title = "Digit " + str(5))
plot_data = go.Scatter3d(x = data[:,0], y = data[:,1], z = data[:,2],
                    mode = 'markers', marker = dict(size = 1))

fig = go.Figure(data = [plot_data],layout = layout)
#iplot(fig)
fig.show()

In [12]:
train_df = all_df.iloc[:int(len(all_df)*0.8)]
val_df = all_df.iloc[int(len(all_df)*0.8):]

In [13]:
class CustomDataset(Dataset):
    def __init__(self, id_list, label_list, point_list,mode):
        self.mode =mode
        assert mode =='train' or mode =='val'
        self.id_list = id_list
        self.label_list = label_list
        self.point_list = point_list
        
        
    def __getitem__(self, index):
        image_id = self.id_list[index]
        
        # h5파일을 바로 접근하여 사용하면 학습 속도가 병목 현상으로 많이 느릴 수 있습니다.
        points = self.point_list[str(image_id)][:]
        image = self.get_vector(points)
        
        if self.label_list is not None:
            label = self.label_list[index]
            return torch.Tensor(image).unsqueeze(0), label
        else:
            return torch.Tensor(image).unsqueeze(0)

    def __len__(self):
        return len(self.id_list)
    
    def get_vector(self, points, x_y_z=[16, 16, 16]):
        # 3D Points -> [16,16,16]
        if self.mode == 'train':
            points = self.rotation(points)
        
        xyzmin = np.min(points, axis=0) - 0.001
        xyzmax = np.max(points, axis=0) + 0.001

        diff = max(xyzmax-xyzmin) - (xyzmax-xyzmin)
        xyzmin = xyzmin - diff / 2
        xyzmax = xyzmax + diff / 2

        segments = []
        shape = []

        for i in range(3):
            # note the +1 in num 
            if type(x_y_z[i]) is not int:
                raise TypeError("x_y_z[{}] must be int".format(i))
            s, step = np.linspace(xyzmin[i], xyzmax[i], num=(x_y_z[i] + 1), retstep=True)
            segments.append(s)
            shape.append(step)

        n_voxels = x_y_z[0] * x_y_z[1] * x_y_z[2]
        n_x = x_y_z[0]
        n_y = x_y_z[1]
        n_z = x_y_z[2]

        structure = np.zeros((len(points), 4), dtype=int)
        structure[:,0] = np.searchsorted(segments[0], points[:,0]) - 1
        structure[:,1] = np.searchsorted(segments[1], points[:,1]) - 1
        structure[:,2] = np.searchsorted(segments[2], points[:,2]) - 1

        # i = ((y * n_x) + x) + (z * (n_x * n_y))
        structure[:,3] = ((structure[:,1] * n_x) + structure[:,0]) + (structure[:,2] * (n_x * n_y)) 

        vector = np.zeros(n_voxels)
        count = np.bincount(structure[:, 3])
        
        # normalization
        # count = count.astype(float)
        # count /= count.max()

        vector[:len(count)] = count
        vector = vector.reshape(n_z, n_y, n_x)
        # cliping
        vector = np.clip(vector, 0, 1)
        return vector

    def rotation(self, points):
        points = np.array(points)
        x,y,z = points[:, 0], points[:, 1], points[:, 2]
        low, high = 0, .5
        
        # z축 회전
        theta = np.random.uniform(low, high) * np.pi
        _x = x * np.cos(theta) - y * np.sin(theta)
        _y = x * np.sin(theta) + y * np.cos(theta)
        _z = z
        x, y, z = _x, _y, _z
        
        # y축
        theta = np.random.uniform(low, high) * np.pi
        _x = x * np.cos(theta) + z * np.sin(theta)
        _y = y
        _z = - x * np.sin(theta) + z * np.cos(theta)
        x, y, z = _x, _y, _z
        
        # x축
        theta = np.random.uniform(low,high) * np.pi
        _x = x
        _y = y * np.cos(theta) - z * np.sin(theta)
        _z = y * np.sin(theta) + z * np.cos(theta)
        x, y, z = _x, _y, _z

        x = np.expand_dims(x, -1)
        y = np.expand_dims(y, -1)
        z = np.expand_dims(z, -1)
        
        points = np.concatenate([x, y, z], axis=-1)
        return points

In [14]:
# transform = transforms.Compose([
#     trasnforms.ToTensor(),
#     transforms.Normalize((0.5, 0.5, 0.5))
# ])

train_dataset = CustomDataset(train_df['ID'].values, train_df['label'].values, all_points, 'train')
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_df['ID'].values, val_df['label'].values, all_points, 'val')
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [15]:
class BaseModel(nn.Module):
    def __init__(self):
        super(BaseModel,self).__init__()
        self.feature_extract = nn.Sequential(
            nn.Conv3d(1, 8, 3), # (16, 8, 14, 14, 14)
            nn.ReLU(inplace=True),
            nn.BatchNorm3d(8),
            nn.Conv3d(8, 32, 3), # (16, 32, 12, 12, 12)
            nn.ReLU(inplace=True),
            nn.BatchNorm3d(32),
            nn.MaxPool3d(4), # (16, 32, 3, 3, 3)
            nn.Conv3d(32, 64, 3), # (16, 32, 1, 1, 1)
            nn.ReLU(inplace=True),
        )
        self.classifier = nn.Sequential(
            nn.Linear(64, 512),
            nn.Dropout(0.1),
            nn.Linear(512, 10)
        )
        
        # Spatial transformer localization-network
        # input featuremap 이 들어가서 transformation parameter theta를 뽑아낸다. 
        self.localization = nn.Sequential(
            nn.Conv3d(1, 8, kernel_size=3, padding=1), # (16, 8, 16, 16, 16)
            nn.MaxPool3d(2, stride=2), # (16, 8, 8, 8, 8)
            nn.ReLU(inplace=True),
            nn.Conv3d(8, 32, kernel_size=3), # (16, 32, 6, 6, 6)
            nn.MaxPool3d(2, stride=2), # (16, 32, 3, 3, 3)
            nn.ReLU(inplace=True)
        )

        # Regressor for the 3 * 2 affine matrix
        self.fc_loc = nn.Sequential(
            nn.Linear(32 * 3 * 3 * 3, 128), # Linear Layer 파라미터 증가
            nn.ReLU(inplace=True),
            nn.Linear(128, 32), # 추가 Layer
            nn.ReLU(inplace=True),
            nn.Linear(32, 4 * 3)
        )

        # Initialize the weights/bias with identity transformation
        self.fc_loc[-1].weight.data.zero_()
        self.fc_loc[-1].bias.data.copy_(torch.eye(3, 4).view(1, -1).squeeze())

    # Spatial transformer network forward function
    def stn(self, x):
        xs = self.localization(x)
        xs = xs.view(-1, 32 * 3 * 3 * 3)
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 3, 4)

        grid = F.affine_grid(theta, x.size())
        x = F.grid_sample(x, grid)
        return x

    def forward(self,x):
        #if self.training 
        x = self.stn(x)
        
        x = self.feature_extract(x)
        x = x.view(x.size()[0], -1) 
        x = self.classifier(x)
        return x

In [16]:
x = torch.randn(16, 1, 16, 16, 16)
net = nn.Conv3d(1, 8, 3, padding=1)
x = net(x)
print('conv3d(1, 8, 3) :', x.size())
net = nn.MaxPool3d(2, stride=2)
x = net(x)
print('maxpooling(2, stride=2) :', x.size())
net = nn.Conv3d(8, 10, 3)
x = net(x)
print('conv3d(8, 10, 3) :', x.size())
net = nn.MaxPool3d(2, stride=2)
x = net(x)
print('maxpooling(2, stride=2) :', x.size())

conv3d(1, 8, 3) : torch.Size([16, 8, 16, 16, 16])
maxpooling(2, stride=2) : torch.Size([16, 8, 8, 8, 8])
conv3d(8, 10, 3) : torch.Size([16, 10, 6, 6, 6])
maxpooling(2, stride=2) : torch.Size([16, 10, 3, 3, 3])


In [17]:
x = torch.randn(16, 1, 16, 16, 16)
x = nn.Conv3d(1, 8, 3)(x)
x = nn.ReLU(inplace=True)(x)
x = nn.BatchNorm3d(8)(x)
print(x.size())
x = nn.Conv3d(8, 32, 3)(x)
x = nn.ReLU(inplace=True)(x)
x = nn.BatchNorm3d(32)(x)
print(x.size())
x = nn.MaxPool3d(4)(x)
print(x.size())
x = nn.Conv3d(32, 32, 3)(x)
x = nn.ReLU(inplace=True)(x)
x.size()

torch.Size([16, 8, 14, 14, 14])
torch.Size([16, 32, 12, 12, 12])
torch.Size([16, 32, 3, 3, 3])


torch.Size([16, 32, 1, 1, 1])

In [18]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    best_score = 0
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for data, label in tqdm(iter(train_loader)):
            data, label = data.float().to(device), label.long().to(device)
            optimizer.zero_grad()
            
            output = model(data)
            loss = criterion(output, label)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        if scheduler is not None:
            scheduler.step()
            
        val_loss, val_acc = validation(model, criterion, val_loader, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss)}] Val Loss : [{val_loss}] Val ACC : [{val_acc}]')
        
        if best_score < val_acc:
            best_score = val_acc
            torch.save(model.state_dict(), './best_model.pth')

In [19]:
def validation(model, criterion, val_loader, device):
    model.eval()
    true_labels = []
    model_preds = []
    val_loss = []
    with torch.no_grad():
        for data, label in tqdm(iter(val_loader)):
            data, label = data.float().to(device), label.long().to(device)
            
            model_pred = model(data)
            loss = criterion(model_pred, label)
            
            val_loss.append(loss.item())
            
            model_preds += model_pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += label.detach().cpu().numpy().tolist()
    
    return np.mean(val_loss), accuracy_score(true_labels, model_preds)


In [1]:
model = BaseModel()
model.eval()
optimizer = optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = None

train(model, optimizer, train_loader, val_loader, scheduler, device)

NameError: name 'BaseModel' is not defined

In [None]:
test_df = pd.read_csv('/data/jyji/datasets/3D_NUMBER/sample_submission.csv')
test_points = h5py.File('/data/jyji/datasets/3D_NUMBER/test.h5', 'r')

In [None]:
test_dataset = CustomDataset(test_df['ID'].values, None, test_points, 'val')
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
checkpoint = torch.load('./best_model.pth')
model = BaseModel()
model.load_state_dict(checkpoint)
model.eval()

BaseModel(
  (feature_extract): Sequential(
    (0): Conv3d(1, 8, kernel_size=(3, 3, 3), stride=(1, 1, 1))
    (1): ReLU(inplace=True)
    (2): BatchNorm3d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv3d(8, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1))
    (4): ReLU(inplace=True)
    (5): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): MaxPool3d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (7): Conv3d(32, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1))
    (8): ReLU(inplace=True)
  )
  (classifier): Linear(in_features=32, out_features=10, bias=True)
  (localization): Sequential(
    (0): Conv3d(1, 8, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (1): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU(inplace=True)
    (3): Conv3d(8, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1))
    (4): MaxPool3d(kernel_size=2, stride=2, padding=0, dilat

In [None]:
def predict(model, test_loader, device):
    model.to(device)
    model.eval()
    model_preds = []
    with torch.no_grad():
        for data in tqdm(iter(test_loader)):
            data = data.float().to(device)
            
            batch_pred = model(data)
            
            model_preds += batch_pred.argmax(1).detach().cpu().numpy().tolist()
    
    return model_preds

In [None]:
preds = predict(model, test_loader, device)

  0%|          | 0/2500 [00:00<?, ?it/s]

In [None]:
test_df['label'] = preds
test_df.to_csv('./submit.csv', index=False)

### 실험 1
Feature normalization, STN, Rotation  

```python
        # Spatial transformer localization-network
        # input featuremap 이 들어가서 transformation parameter theta를 뽑아낸다. 
        self.localization = nn.Sequential(
            nn.Conv3d(1, 8, kernel_size=3, padding=1),
            nn.MaxPool3d(2, stride=2),
            nn.ReLU(True),
            nn.Conv3d(8, 10, kernel_size=3), 
            nn.MaxPool3d(2, stride=2),
            nn.ReLU(True)
        )

        # Regressor for the 3 * 2 affine matrix
        self.fc_loc = nn.Sequential(
            nn.Linear(10 * 3 * 3 * 3, 128),
            nn.ReLU(True),
            nn.Linear(128, 4 * 3)
        )

        # Initialize the weights/bias with identity transformation
        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(torch.eye(3, 4).view(1, -1).squeeze())

    # Spatial transformer network forward function
    def stn(self, x):
        xs = self.localization(x)
        xs = xs.view(-1, 10 * 3 * 3 * 3)
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 3, 4) #theta가 2,3행렬 ->3,4

        grid = F.affine_grid(theta, x.size())
        x = F.grid_sample(x, grid)
        return x
```

- Epoch : [1] Train Loss : [1.0433941685169936] Val Loss : [1.6961502247810363] Val ACC : [0.3992]
- Epoch : [2] Train Loss : [0.41077621029019357] Val Loss : [0.6488987791657448] Val ACC : [0.7867]
- Epoch : [3] Train Loss : [0.2955318866282701] Val Loss : [0.6096185804128647] Val ACC : [0.8026]  
- Epoch : [4] Train Loss : [0.2538589173272252] Val Loss : [0.6137630719900131] Val ACC : [0.7949]  
- Epoch : [5] Train Loss : [0.2212211425559595] Val Loss : [0.6799417055368423] Val ACC : [0.7805]  
- Epoch : [6] Train Loss : [0.20211621579620986] Val Loss : [0.6343584134340287] Val ACC : [0.7953]  
- Epoch : [7] Train Loss : [0.1875702461127192] Val Loss : [0.5718951389789582] Val ACC : [0.8181]  
- Epoch : [8] Train Loss : [0.17010618286160753] Val Loss : [0.46725307679474354] Val ACC : [0.858]  
- Epoch : [9] Train Loss : [0.1573759648042731] Val Loss : [0.5456441253364086] Val ACC : [0.8287]  
- Epoch : [10] Train Loss : [0.15026631673905066] Val Loss : [0.5265158689022065] Val ACC : [0.8361] 

80퍼 쯤에서 수렴하고 들쭉날쭉한 모습  
normalization은 별로 도움이 안되는듯 하다  


### 실험 2
feature cliping, stn, rotation  
코드 위와 같음  
- Epoch : [1] Train Loss : [1.1796915194749833] Val Loss : [1.8820237781524658] Val ACC : [0.3647]
- Epoch : [2] Train Loss : [0.5416597831100225] Val Loss : [1.5784625809669495] Val ACC : [0.486]
- Epoch : [3] Train Loss : [0.4066094972215593] Val Loss : [1.3008029669761658] Val ACC : [0.5681]
- Epoch : [4] Train Loss : [0.33310327495783565] Val Loss : [1.048465249824524] Val ACC : [0.6509]
- Epoch : [5] Train Loss : [0.2834464996729046] Val Loss : [1.4537052973747253] Val ACC : [0.575]
- Epoch : [6] Train Loss : [0.23931061743199825] Val Loss : [0.8034082006931305] Val ACC : [0.7376]
- Epoch : [7] Train Loss : [0.2142291997767985] Val Loss : [0.7064691490530968] Val ACC : [0.7733]
- Epoch : [8] Train Loss : [0.19037675377763807] Val Loss : [0.735043149960041] Val ACC : [0.7748]
- Epoch : [9] Train Loss : [0.17945399615298957] Val Loss : [0.7377542537808418] Val ACC : [0.7754]
- Epoch : [10] Train Loss : [0.16311595387123526] Val Loss : [0.6841550967574119] Val ACC : [0.7827]

아직 덜 학습된 것으로 보아 조금 더 돌려볼 필요가 있는듯 하다


