<a href="https://colab.research.google.com/github/ilia1221/test/blob/main/part3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp '/content/drive/My Drive/test_task/dataset-v2.zip' dataset-v2.zip
!unzip -q dataset-v2.zip
# !cp '/content/drive/My Drive/test_task/wavefront.py' wavefront.py
! wget https://raw.githubusercontent.com/ilia1221/test/main/wavefront.py -O wavefront.py

In [None]:
!ls

In [3]:
import os
import sys
import copy
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR, StepLR
from torch.utils.data import DataLoader, Dataset
from dataclasses import dataclass
import sklearn.metrics as metrics
from sklearn.metrics import classification_report
from tqdm import tqdm

Класс с параметрами обучения

In [4]:
@dataclass
class Config:
  exp_name: str
  batch_size: int
  test_batch_size: int
  epochs: int
  use_sgd: bool
  lr: float
  momentum: float
  scheduler: str
  seed: int  
  num_points: int
  dropout: float
  emb_dims: int
  k: int
  device: str
  checkpoints_path: str

Задаем архитектуру сети DGCNN

In [5]:
def knn(x, k):
    inner = -2*torch.matmul(x.transpose(2, 1), x)
    xx = torch.sum(x**2, dim=1, keepdim=True)
    pairwise_distance = -xx - inner - xx.transpose(2, 1)
 
    idx = pairwise_distance.topk(k=k, dim=-1)[1]   # (batch_size, num_points, k)
    return idx


def get_graph_feature(x, k=20, idx=None, dim9=False):
    batch_size = x.size(0)
    num_points = x.size(2)
    x = x.view(batch_size, -1, num_points)
    if idx is None:
        if dim9 == False:
            idx = knn(x, k=k)   # (batch_size, num_points, k)
        else:
            idx = knn(x[:, 6:], k=k)    

    idx_base = torch.arange(0, batch_size, device=x.device).view(-1, 1, 1)*num_points

    idx = idx + idx_base

    idx = idx.view(-1)
 
    _, num_dims, _ = x.size()

    x = x.transpose(2, 1).contiguous()   # (batch_size, num_points, num_dims)  -> (batch_size*num_points, num_dims) #   batch_size * num_points * k + range(0, batch_size*num_points)
    feature = x.view(batch_size*num_points, -1)[idx, :]
    feature = feature.view(batch_size, num_points, k, num_dims) 
    x = x.view(batch_size, num_points, 1, num_dims).repeat(1, 1, k, 1)
    
    feature = torch.cat((feature-x, x), dim=3).permute(0, 3, 1, 2).contiguous()
  
    return feature      # (batch_size, 2*num_dims, num_points, k)


class DGCNN_cls(nn.Module):
    def __init__(self, args, output_channels=40):
        super(DGCNN_cls, self).__init__()
        self.args = args
        self.k = args.k
        
        self.bn1 = nn.BatchNorm2d(64)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)
        self.bn4 = nn.BatchNorm2d(256)
        self.bn5 = nn.BatchNorm1d(args.emb_dims)

        self.conv1 = nn.Sequential(nn.Conv2d(6, 64, kernel_size=1, bias=False),
                                   self.bn1,
                                   nn.LeakyReLU(negative_slope=0.2))
        self.conv2 = nn.Sequential(nn.Conv2d(64*2, 64, kernel_size=1, bias=False),
                                   self.bn2,
                                   nn.LeakyReLU(negative_slope=0.2))
        self.conv3 = nn.Sequential(nn.Conv2d(64*2, 128, kernel_size=1, bias=False),
                                   self.bn3,
                                   nn.LeakyReLU(negative_slope=0.2))
        self.conv4 = nn.Sequential(nn.Conv2d(128*2, 256, kernel_size=1, bias=False),
                                   self.bn4,
                                   nn.LeakyReLU(negative_slope=0.2))
        self.conv5 = nn.Sequential(nn.Conv1d(512, args.emb_dims, kernel_size=1, bias=False),
                                   self.bn5,
                                   nn.LeakyReLU(negative_slope=0.2))
        self.linear1 = nn.Linear(args.emb_dims*2, 512, bias=False)
        self.bn6 = nn.BatchNorm1d(512)
        self.dp1 = nn.Dropout(p=args.dropout)
        self.linear2 = nn.Linear(512, 256)
        self.bn7 = nn.BatchNorm1d(256)
        self.dp2 = nn.Dropout(p=args.dropout)
        self.linear3 = nn.Linear(256, output_channels)

    def forward(self, x):
        batch_size = x.size(0)
        x = get_graph_feature(x, k=self.k)      # (batch_size, 3, num_points) -> (batch_size, 3*2, num_points, k)
        x = self.conv1(x)                       # (batch_size, 3*2, num_points, k) -> (batch_size, 64, num_points, k)
        x1 = x.max(dim=-1, keepdim=False)[0]    # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points)

        x = get_graph_feature(x1, k=self.k)     # (batch_size, 64, num_points) -> (batch_size, 64*2, num_points, k)
        x = self.conv2(x)                       # (batch_size, 64*2, num_points, k) -> (batch_size, 64, num_points, k)
        x2 = x.max(dim=-1, keepdim=False)[0]    # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points)

        x = get_graph_feature(x2, k=self.k)     # (batch_size, 64, num_points) -> (batch_size, 64*2, num_points, k)
        x = self.conv3(x)                       # (batch_size, 64*2, num_points, k) -> (batch_size, 128, num_points, k)
        x3 = x.max(dim=-1, keepdim=False)[0]    # (batch_size, 128, num_points, k) -> (batch_size, 128, num_points)

        x = get_graph_feature(x3, k=self.k)     # (batch_size, 128, num_points) -> (batch_size, 128*2, num_points, k)
        x = self.conv4(x)                       # (batch_size, 128*2, num_points, k) -> (batch_size, 256, num_points, k)
        x4 = x.max(dim=-1, keepdim=False)[0]    # (batch_size, 256, num_points, k) -> (batch_size, 256, num_points)

        x = torch.cat((x1, x2, x3, x4), dim=1)  # (batch_size, 64+64+128+256, num_points)

        x = self.conv5(x)                       # (batch_size, 64+64+128+256, num_points) -> (batch_size, emb_dims, num_points)
        x1 = F.adaptive_max_pool1d(x, 1).view(batch_size, -1)           # (batch_size, emb_dims, num_points) -> (batch_size, emb_dims)
        x2 = F.adaptive_avg_pool1d(x, 1).view(batch_size, -1)           # (batch_size, emb_dims, num_points) -> (batch_size, emb_dims)
        x = torch.cat((x1, x2), 1)              # (batch_size, emb_dims*2)

        x = F.leaky_relu(self.bn6(self.linear1(x)), negative_slope=0.2) # (batch_size, emb_dims*2) -> (batch_size, 512)
        x = self.dp1(x)
        x = F.leaky_relu(self.bn7(self.linear2(x)), negative_slope=0.2) # (batch_size, 512) -> (batch_size, 256)
        x = self.dp2(x)
        x = self.linear3(x)                                             # (batch_size, 256) -> (batch_size, output_channels)
        
        return x

Класс и функции для работы с датасетом


In [6]:
from wavefront import load_obj

def translate_pointcloud(pointcloud):
    xyz1 = np.random.uniform(low=2./3., high=3./2., size=[3])
    xyz2 = np.random.uniform(low=-0.2, high=0.2, size=[3])
       
    translated_pointcloud = np.add(np.multiply(pointcloud, xyz1), xyz2).astype('float32')
    return translated_pointcloud


def jitter_pointcloud(pointcloud, sigma=0.01, clip=0.02):
    N, C = pointcloud.shape
    pointcloud += np.clip(sigma * np.random.randn(N, C), -1*clip, clip)
    return pointcloud


def rotate_pointcloud(pointcloud):
    theta = np.pi*2 * np.random.uniform()
    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta)],[np.sin(theta), np.cos(theta)]])
    pointcloud[:,[0,2]] = pointcloud[:,[0,2]].dot(rotation_matrix) # random rotation (x,z)
    return pointcloud


label2num = {
    'cone': 0,
    'cube': 1,
    'cylinder': 2,
    'plane': 3,
    'torus': 4,
    'uv_sphere': 5,
}

num2label = {v: k for k, v in label2num.items()}

def load_data(path, partition):
  data = []
  labels = []
  
  classes = os.listdir(path)
  for cls in classes:
    dir_path = os.path.join(path, cls, partition)
    files = [os.path.join(dir_path, f) for f in os.listdir(dir_path)]    

    for f in files:
      wavefront_obj = load_obj(f)
      data.append(np.asarray(wavefront_obj.vertices, dtype=np.float32))      
      labels.append(label2num[cls])
    
  return data, labels


class GeometricShapesDataset(Dataset):
  def __init__(self, path, num_points, partition='train'):      
      self.num_points = num_points
      self.partition = partition    
      self.data, self.label = load_data(path, self.partition) 
      print(f'Dataset size: {len(self.data)}')     


  def __getitem__(self, idx):
      pointcloud = self.data[idx]      
      label = self.label[idx]

      if self.partition == 'train':
         pointcloud = translate_pointcloud(pointcloud)
         np.random.shuffle(pointcloud)

      if pointcloud.shape[0] < self.num_points:
         pad_to = self.num_points - pointcloud.shape[0]
         pointcloud = np.pad(pointcloud, [(0, pad_to), (0, 0)], mode='constant')

      return pointcloud, label


  def __len__(self):
    return len(self.data)

Train loop

In [13]:
def train(args):
    train_dataset = GeometricShapesDataset('dataset-v2', 1024, partition='train')
    test_dataset = GeometricShapesDataset('dataset-v2', 1024, partition='test')

    train_loader = DataLoader(train_dataset,
                              num_workers=8,
                              batch_size=args.batch_size,
                              shuffle=True,
                              drop_last=True)
    
    test_loader = DataLoader(test_dataset,
                             num_workers=8,
                             batch_size=args.test_batch_size,
                             shuffle=True,
                             drop_last=False)

    device = args.device
    model = DGCNN_cls(args, output_channels=len(label2num)).to(device)    

    if args.use_sgd:        
        opt = optim.SGD(model.parameters(), lr=args.lr*100, momentum=args.momentum, weight_decay=1e-4)
    else:        
        opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)

    if args.scheduler == 'cos':
        scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=1e-3)
    elif args.scheduler == 'step':
        scheduler = StepLR(opt, step_size=20, gamma=0.7)
    
    criterion = torch.nn.CrossEntropyLoss()

    best_test_acc = 0
    for epoch in range(args.epochs):
        outstr = f'Epoch: {epoch} lr: {opt.param_groups[0]["lr"]:.3e}'
        ####################
        # Train
        ####################
        train_loss = 0.0
        count = 0.0
        model.train()
        train_pred = []
        train_true = []
        for data, label in train_loader:
            data, label = data.to(device), label.to(device).squeeze()
            data = data.permute(0, 2, 1)
            batch_size = data.size()[0]
            opt.zero_grad()
            logits = model(data)
            loss = criterion(logits, label)
            loss.backward()
            opt.step()
            preds = logits.max(dim=1)[1]
            count += batch_size
            train_loss += loss.item() * batch_size
            train_true.append(label.cpu().numpy())
            train_pred.append(preds.detach().cpu().numpy())
        if args.scheduler == 'cos':
            scheduler.step()
        elif args.scheduler == 'step':
            if opt.param_groups[0]['lr'] > 1e-5:
                scheduler.step()
            if opt.param_groups[0]['lr'] < 1e-5:
                for param_group in opt.param_groups:
                    param_group['lr'] = 1e-5

        train_true = np.concatenate(train_true)
        train_pred = np.concatenate(train_pred)
        outstr += '   train_loss: %.6f, train_acc: %.6f, train_avg_acc: %.6f' % ( train_loss*1.0/count,
                                                                                 metrics.accuracy_score(
                                                                                     train_true, train_pred),
                                                                                 metrics.balanced_accuracy_score(
                                                                                     train_true, train_pred))        
                
        ####################
        # Test
        ####################
        test_loss = 0.0
        count = 0.0
        model.eval()
        test_pred = []
        test_true = []
        with torch.no_grad():
          for data, label in test_loader:
              data, label = data.to(device), label.to(device).squeeze()
              data = data.permute(0, 2, 1)
              batch_size = data.size()[0]
              logits = model(data)
              loss = criterion(logits, label)
              preds = logits.max(dim=1)[1]
              count += batch_size
              test_loss += loss.item() * batch_size
              test_true.append(label.cpu().numpy())
              test_pred.append(preds.detach().cpu().numpy())
          test_true = np.concatenate(test_true)
          test_pred = np.concatenate(test_pred)
          test_acc = metrics.accuracy_score(test_true, test_pred)
          avg_per_class_acc = metrics.balanced_accuracy_score(test_true, test_pred)
          outstr += '   test_loss: %.6f, test_acc: %.6f, test_avg_acc: %.6f' % ( test_loss*1.0/count,
                                                                                test_acc,
                                                                                avg_per_class_acc)
        
        print(outstr)
        if test_acc >= best_test_acc:
            best_test_acc = test_acc
            torch.save(model.state_dict(), os.path.join(args.checkpoints_path, 'model_best.pth'))


def test(args):    
    dataset = GeometricShapesDataset('dataset-v2', 1024, partition='valid')
    loader = DataLoader(dataset,
                              num_workers=8,
                              batch_size=args.batch_size,
                              shuffle=False,
                              drop_last=False)

    device = args.device

    model = DGCNN_cls(args).to(device)    
    model.load_state_dict(torch.load(os.path.join(args.checkpoints_path, 'model_best.pth')))
    model.eval()
    test_acc = 0.0
    count = 0.0
    test_true = []
    test_pred = []

    with torch.no_grad():
      for data, label in loader:
          data, label = data.to(device), label.to(device).squeeze()
          data = data.permute(0, 2, 1)
          batch_size = data.size()[0]
          logits = model(data)
          preds = logits.max(dim=1)[1]
          test_true.append(label.cpu().numpy())
          test_pred.append(preds.detach().cpu().numpy())

    test_true = np.concatenate(test_true)
    test_pred = np.concatenate(test_pred)
    test_acc = metrics.accuracy_score(test_true, test_pred)
    avg_per_class_acc = metrics.balanced_accuracy_score(test_true, test_pred)
    
    report = classification_report(test_true, test_pred, target_names=list(label2num.keys()))
    print(report)

    outstr = 'Test :: test acc: %.6f, test avg acc: %.6f'%(test_acc, avg_per_class_acc)
    print(outstr)


def convert_to_jit(args):
    model = DGCNN_cls(args, output_channels=len(label2num)).to(args.device)
    model.load_state_dict(torch.load(os.path.join(args.checkpoints_path, 'model_best.pth')))
    model.eval()
    print(model)

    x = torch.rand(3, 1024).unsqueeze(0).to(args.device)
    traced = torch.jit.trace(model, x)

    torch.jit.save(traced, os.path.join(args.checkpoints_path, f'model_best_{args.device}.pt'))




Run train

In [10]:
config = Config(
  exp_name = 'exp',
  batch_size = 32,
  test_batch_size = 16,
  epochs = 250,
  use_sgd = True,
  lr = 0.001,
  momentum = 0.9,
  scheduler = 'cos',
  seed = 1,     
  num_points = 1024,
  dropout = 0.5,
  emb_dims = 1024,
  k = 20,
  device = 'cuda',
  checkpoints_path = '/content/drive/My Drive/test_task_2/checkpoints/',
) 

torch.manual_seed(config.seed)    
torch.cuda.manual_seed(config.seed)    

train(config)    

# test(config)

# convert_to_jit(config)



Dataset size: 1200
Dataset size: 600


  cpuset_checked))


Epoch: 0 lr: 1.000e-01   train_loss: 1.099090, train_acc: 0.683277, train_avg_acc: 0.554430   test_loss: 1.540802, test_acc: 0.580000, test_avg_acc: 0.580000
Epoch: 1 lr: 1.000e-01   train_loss: 0.962094, train_acc: 0.721284, train_avg_acc: 0.579952   test_loss: 0.626603, test_acc: 0.723333, test_avg_acc: 0.723333
Epoch: 2 lr: 9.998e-02   train_loss: 0.545507, train_acc: 0.776182, train_avg_acc: 0.640210   test_loss: 0.558361, test_acc: 0.735000, test_avg_acc: 0.735000
Epoch: 3 lr: 9.996e-02   train_loss: 0.451606, train_acc: 0.804054, train_avg_acc: 0.670563   test_loss: 0.583768, test_acc: 0.708333, test_avg_acc: 0.708333
Epoch: 4 lr: 9.994e-02   train_loss: 0.413898, train_acc: 0.810811, train_avg_acc: 0.670175   test_loss: 0.567091, test_acc: 0.698333, test_avg_acc: 0.698333
Epoch: 5 lr: 9.990e-02   train_loss: 0.396840, train_acc: 0.826858, train_avg_acc: 0.701873   test_loss: 0.531916, test_acc: 0.720000, test_avg_acc: 0.720000
Epoch: 6 lr: 9.986e-02   train_loss: 0.417963, train

KeyboardInterrupt: ignored

Заключение.
Модель склонна к переобучению, причем возможно на количество точек (у каждой фигуры оно свое).
Как минимум нужен датасет где количество точек в облаке не связано с формой этого облака.
Можно попробовать добавлять аугментации (jitter, translation, rotation, rotation вроде и так в датасете присутствует)

Были мысли свести задачу к двумерной, например как описано тут: https://arxiv.org/abs/2106.05304, но хотелось попробовать архитектуру DGCNN.

Требование про единый стиль кода видел, но т.к. часть кода взята с гитхаба, и работа велась в Colab, было проблематично привести его в нормальный вид и разбить на модули.
Обычно я не использую тетрадки для тренировки моделей, а запускаю тренировку в скрине. На модули разбил бы так как тут разбито на ячейки. Т.е. отдельно модуль с архитектурой модели, отдельно с train loop, отдельно с dataloader. Не стал делать это разбиение тут, поскольку было бы сильно неудобно вносить правки в код. Еще в реальном кейсе labels надо сохранять в модель при конвертировании в jit. Сам train loop тоже разить на функции, или вообще перенести на Catalyst.


Convert to jit

In [14]:
config = Config(
  exp_name = 'exp',
  batch_size = 32,
  test_batch_size = 16,
  epochs = 250,
  use_sgd = True,
  lr = 0.001,
  momentum = 0.9,
  scheduler = 'cos',
  seed = 1,     
  num_points = 1024,
  dropout = 0.5,
  emb_dims = 1024,
  k = 20,
  device = 'cpu',
  checkpoints_path = '/content/drive/My Drive/test_task_2/checkpoints/',
) 

torch.manual_seed(config.seed)    
torch.cuda.manual_seed(config.seed)    

# train(config)    

# test(config)

convert_to_jit(config)

DGCNN_cls(
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn5): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv1): Sequential(
    (0): Conv2d(6, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.2)
  )
  (conv2): Sequential(
    (0): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.2)
  )
  (conv3): Sequential(
    (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias