In [1]:
import h5py
import pandas as pd
import numpy as np
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random
import torch.nn.init
import matplotlib.pyplot as plt
import os
import glob

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from tqdm.auto import tqdm

from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cpu')

In [3]:
CFG = {
    'EPOCHS':15,
    'LEARNING_RATE':1e-3,
    'BATCH_SIZE':16,
    'SEED':41
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [5]:
all_df = pd.read_csv('C:/Users/김승우/Desktop/파이썬/dacon/3D MNIST Classification/train.csv')
all_points = h5py.File('C:/Users/김승우/Desktop/파이썬/dacon/3D MNIST Classification/train.h5', 'r')

all_points

<HDF5 file "train.h5" (mode r)>

In [6]:
train_df = all_df.iloc[:int(len(all_df)*0.8)]
val_df = all_df.iloc[int(len(all_df)*0.8):]

train_df.head()

Unnamed: 0,ID,label
0,0,5
1,1,0
2,2,4
3,3,1
4,4,9


In [7]:
class CustomDataset(Dataset):
    def __init__(self, id_list, label_list, point_list):
        self.id_list = id_list
        self.label_list = label_list
        self.point_list = point_list
        
    def __getitem__(self, index):
        image_id = self.id_list[index]
        
        # h5파일을 바로 접근하여 사용하면 학습 속도가 병목 현상으로 많이 느릴 수 있습니다.
        points = self.point_list[str(image_id)][:]
        image = self.get_vector(points)
        
        if self.label_list is not None:
            label = self.label_list[index]
            return torch.Tensor(image).unsqueeze(0), label
        else:
            return torch.Tensor(image).unsqueeze(0)
    
    def get_vector(self, points, x_y_z=[16, 16, 16]):
        # 3D Points -> [16,16,16]
        xyzmin = np.min(points, axis=0) - 0.001
        xyzmax = np.max(points, axis=0) + 0.001
        print("1번",np.min(points, axis=0),np.max(points, axis=0))
        print("2번",xyzmin, xyzmax)
        
        diff = max(xyzmax-xyzmin) - (xyzmax-xyzmin)
        print("3번",max(xyzmax-xyzmin),(xyzmax-xyzmin),diff)
        xyzmin = xyzmin - diff / 2
        xyzmax = xyzmax + diff / 2
        print("4번",xyzmin, xyzmax)
        
        segments = []
        shape = []

        for i in range(3):
            # note the +1 in num 
            if type(x_y_z[i]) is not int:
                raise TypeError("x_y_z[{}] must be int".format(i))
            s, step = np.linspace(xyzmin[i], xyzmax[i], num=(x_y_z[i] + 1), retstep=True)
            segments.append(s)
            shape.append(step)
            print("5_{}번".format(i),s)
            print("6_{}번".format(i),step)
            print("7_{}번".format(i),segments)
            print("8_{}번".format(i),shape)
        
        print("9번",x_y_z[0],x_y_z[1],x_y_z[2])
        n_voxels = x_y_z[0] * x_y_z[1] * x_y_z[2]
        n_x = x_y_z[0]
        n_y = x_y_z[1]
        n_z = x_y_z[2]
        print("10번",n_voxels,n_x,n_y,n_z)
        
        structure = np.zeros((len(points), 4), dtype=int)
        print("11번",points,structure)
        print("12번",segments[0],points[:,0])
        structure[:,0] = np.searchsorted(segments[0], points[:,0]) - 1
        structure[:,1] = np.searchsorted(segments[1], points[:,1]) - 1
        structure[:,2] = np.searchsorted(segments[2], points[:,2]) - 1
        print("13번",structure[:,0],structure[:,1],structure[:,2])
        
        # i = ((y * n_x) + x) + (z * (n_x * n_y))
        structure[:,3] = ((structure[:,1] * n_x) + structure[:,0]) + (structure[:,2] * (n_x * n_y)) 
        print("14번",structure[:,3])
        
        vector = np.zeros(n_voxels)
        print("15번",vector)
        count = np.bincount(structure[:,3])
        vector[:len(count)] = count
        print("16번",count,len(count),vector[:len(count)])
        
        vector = vector.reshape(n_z, n_y, n_x)
        print("17번",vector)
        return vector

    def __len__(self):
        return len(self.id_list)

In [8]:
# dataset loader
train_dataset = CustomDataset(train_df['ID'].values, train_df['label'].values, all_points)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_df['ID'].values, val_df['label'].values, all_points)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [9]:
train_dataset[0]

1번 [-0.15       -0.5094358  -0.52694553] [0.15       0.4905642  0.47305447]
2번 [-0.151      -0.5104358  -0.52794553] [0.151      0.4915642  0.47405447]
3번 1.002 [0.302 1.002 1.002] [0.7 0.  0. ]
4번 [-0.501      -0.5104358  -0.52794553] [0.501      0.4915642  0.47405447]
5_0번 [-5.01000000e-01 -4.38375000e-01 -3.75750000e-01 -3.13125000e-01
 -2.50500000e-01 -1.87875000e-01 -1.25250000e-01 -6.26250000e-02
  5.55111512e-15  6.26250000e-02  1.25250000e-01  1.87875000e-01
  2.50500000e-01  3.13125000e-01  3.75750000e-01  4.38375000e-01
  5.01000000e-01]
6_0번 0.062625
7_0번 [array([-5.01000000e-01, -4.38375000e-01, -3.75750000e-01, -3.13125000e-01,
       -2.50500000e-01, -1.87875000e-01, -1.25250000e-01, -6.26250000e-02,
        5.55111512e-15,  6.26250000e-02,  1.25250000e-01,  1.87875000e-01,
        2.50500000e-01,  3.13125000e-01,  3.75750000e-01,  4.38375000e-01,
        5.01000000e-01])]
8_0번 [0.062625]
5_1번 [-0.5104358 -0.4478108 -0.3851858 -0.3225608 -0.2599358 -0.1973108
 -0.1346858 

(tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          ...,
 
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
       

In [10]:
train_dataset[0][0][0][0]

1번 [-0.15       -0.5094358  -0.52694553] [0.15       0.4905642  0.47305447]
2번 [-0.151      -0.5104358  -0.52794553] [0.151      0.4915642  0.47405447]
3번 1.002 [0.302 1.002 1.002] [0.7 0.  0. ]
4번 [-0.501      -0.5104358  -0.52794553] [0.501      0.4915642  0.47405447]
5_0번 [-5.01000000e-01 -4.38375000e-01 -3.75750000e-01 -3.13125000e-01
 -2.50500000e-01 -1.87875000e-01 -1.25250000e-01 -6.26250000e-02
  5.55111512e-15  6.26250000e-02  1.25250000e-01  1.87875000e-01
  2.50500000e-01  3.13125000e-01  3.75750000e-01  4.38375000e-01
  5.01000000e-01]
6_0번 0.062625
7_0번 [array([-5.01000000e-01, -4.38375000e-01, -3.75750000e-01, -3.13125000e-01,
       -2.50500000e-01, -1.87875000e-01, -1.25250000e-01, -6.26250000e-02,
        5.55111512e-15,  6.26250000e-02,  1.25250000e-01,  1.87875000e-01,
        2.50500000e-01,  3.13125000e-01,  3.75750000e-01,  4.38375000e-01,
        5.01000000e-01])]
8_0번 [0.062625]
5_1번 [-0.5104358 -0.4478108 -0.3851858 -0.3225608 -0.2599358 -0.1973108
 -0.1346858 

tensor([[ 0.,  0.,  0.,  0.,  0., 72., 72., 72., 72., 72., 72.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., 54., 36., 36., 36., 36., 54.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., 54., 36., 36., 36., 36., 54.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., 54., 36., 36., 36., 36., 54.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., 63., 42., 42., 42., 42., 63.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., 54., 36., 36., 36., 36., 54.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., 54., 66., 66., 66., 66., 54.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., 31., 48., 48., 48., 48., 31.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 

In [11]:
train_dataset[30000]

1번 [-0.15       -0.42336601 -0.49395425] [0.15       0.37663399 0.50604575]
2번 [-0.151      -0.42436601 -0.49495425] [0.151      0.37763399 0.50704575]
3번 1.002 [0.302 0.802 1.002] [0.7 0.2 0. ]
4번 [-0.501      -0.52436601 -0.49495425] [0.501      0.47763399 0.50704575]
5_0번 [-5.01000000e-01 -4.38375000e-01 -3.75750000e-01 -3.13125000e-01
 -2.50500000e-01 -1.87875000e-01 -1.25250000e-01 -6.26250000e-02
  2.22044605e-15  6.26250000e-02  1.25250000e-01  1.87875000e-01
  2.50500000e-01  3.13125000e-01  3.75750000e-01  4.38375000e-01
  5.01000000e-01]
6_0번 0.062625
7_0번 [array([-5.01000000e-01, -4.38375000e-01, -3.75750000e-01, -3.13125000e-01,
       -2.50500000e-01, -1.87875000e-01, -1.25250000e-01, -6.26250000e-02,
        2.22044605e-15,  6.26250000e-02,  1.25250000e-01,  1.87875000e-01,
        2.50500000e-01,  3.13125000e-01,  3.75750000e-01,  4.38375000e-01,
        5.01000000e-01])]
8_0번 [0.062625]
5_1번 [-0.52436601 -0.46174101 -0.39911601 -0.33649101 -0.27386601 -0.21124101
 -0.14

(tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          ...,
 
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
       

In [12]:
train_dataset[30000][0][0][0]

1번 [-0.15       -0.42336601 -0.49395425] [0.15       0.37663399 0.50604575]
2번 [-0.151      -0.42436601 -0.49495425] [0.151      0.37763399 0.50704575]
3번 1.002 [0.302 0.802 1.002] [0.7 0.2 0. ]
4번 [-0.501      -0.52436601 -0.49495425] [0.501      0.47763399 0.50704575]
5_0번 [-5.01000000e-01 -4.38375000e-01 -3.75750000e-01 -3.13125000e-01
 -2.50500000e-01 -1.87875000e-01 -1.25250000e-01 -6.26250000e-02
  2.22044605e-15  6.26250000e-02  1.25250000e-01  1.87875000e-01
  2.50500000e-01  3.13125000e-01  3.75750000e-01  4.38375000e-01
  5.01000000e-01]
6_0번 0.062625
7_0번 [array([-5.01000000e-01, -4.38375000e-01, -3.75750000e-01, -3.13125000e-01,
       -2.50500000e-01, -1.87875000e-01, -1.25250000e-01, -6.26250000e-02,
        2.22044605e-15,  6.26250000e-02,  1.25250000e-01,  1.87875000e-01,
        2.50500000e-01,  3.13125000e-01,  3.75750000e-01,  4.38375000e-01,
        5.01000000e-01])]
8_0번 [0.062625]
5_1번 [-0.52436601 -0.46174101 -0.39911601 -0.33649101 -0.27386601 -0.21124101
 -0.14

tensor([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  7., 12., 12., 12., 12.,  7.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., 49., 66., 66., 66., 66., 49.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., 54., 36., 36., 36., 36., 54.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., 54., 36., 36., 36., 36., 54.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., 72., 48., 48., 48., 48., 72.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., 54., 66., 66., 66., 66., 54.,  0.,  0.,  0.,
          0.,  0.],
        [ 

In [13]:
train_dataset[20900]

1번 [-0.15       -0.30413669 -0.55485612] [0.15       0.29586331 0.44514388]
2번 [-0.151      -0.30513669 -0.55585612] [0.151      0.29686331 0.44614388]
3번 1.002 [0.302 0.602 1.002] [0.7 0.4 0. ]
4번 [-0.501      -0.50513669 -0.55585612] [0.501      0.49686331 0.44614388]
5_0번 [-5.0100000e-01 -4.3837500e-01 -3.7575000e-01 -3.1312500e-01
 -2.5050000e-01 -1.8787500e-01 -1.2525000e-01 -6.2625000e-02
  4.4408921e-14  6.2625000e-02  1.2525000e-01  1.8787500e-01
  2.5050000e-01  3.1312500e-01  3.7575000e-01  4.3837500e-01
  5.0100000e-01]
6_0번 0.062625
7_0번 [array([-5.0100000e-01, -4.3837500e-01, -3.7575000e-01, -3.1312500e-01,
       -2.5050000e-01, -1.8787500e-01, -1.2525000e-01, -6.2625000e-02,
        4.4408921e-14,  6.2625000e-02,  1.2525000e-01,  1.8787500e-01,
        2.5050000e-01,  3.1312500e-01,  3.7575000e-01,  4.3837500e-01,
        5.0100000e-01])]
8_0번 [0.062625]
5_1번 [-0.50513669 -0.44251169 -0.37988669 -0.31726169 -0.25463669 -0.19201169
 -0.12938669 -0.06676169 -0.00413669  0.

(tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          ...,
 
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
       

In [14]:
train_dataset[20900][0][0][0]

1번 [-0.15       -0.30413669 -0.55485612] [0.15       0.29586331 0.44514388]
2번 [-0.151      -0.30513669 -0.55585612] [0.151      0.29686331 0.44614388]
3번 1.002 [0.302 0.602 1.002] [0.7 0.4 0. ]
4번 [-0.501      -0.50513669 -0.55585612] [0.501      0.49686331 0.44614388]
5_0번 [-5.0100000e-01 -4.3837500e-01 -3.7575000e-01 -3.1312500e-01
 -2.5050000e-01 -1.8787500e-01 -1.2525000e-01 -6.2625000e-02
  4.4408921e-14  6.2625000e-02  1.2525000e-01  1.8787500e-01
  2.5050000e-01  3.1312500e-01  3.7575000e-01  4.3837500e-01
  5.0100000e-01]
6_0번 0.062625
7_0번 [array([-5.0100000e-01, -4.3837500e-01, -3.7575000e-01, -3.1312500e-01,
       -2.5050000e-01, -1.8787500e-01, -1.2525000e-01, -6.2625000e-02,
        4.4408921e-14,  6.2625000e-02,  1.2525000e-01,  1.8787500e-01,
        2.5050000e-01,  3.1312500e-01,  3.7575000e-01,  4.3837500e-01,
        5.0100000e-01])]
8_0번 [0.062625]
5_1번 [-0.50513669 -0.44251169 -0.37988669 -0.31726169 -0.25463669 -0.19201169
 -0.12938669 -0.06676169 -0.00413669  0.

tensor([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., 15., 24., 24., 24., 24., 15.,  0.,  0.,  0.,
          0.,  0.],
        [ 