# Project: MVCNN for Classification and Reconstruction from Multi-views 3D

### Imports

In [1]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import numpy as np
import matplotlib as plt
import k3d
import trimesh
import torch
import skimage
import torchvision

In [2]:
import torch
import PIL
print('Pillow Version:', PIL.__version__)
torch.cuda.is_available()

Pillow Version: 7.2.0


True

## 0. Starting up (1 time)

### (a) Downloading the data

In [None]:
print('Downloading ...')
# File sizes: 11GB for ShapeNetRendering.tgz (Multiview images), 22MB for ShapeNetVox32.tgz (target voxels)
!mkdir -p datasets
!wget http://cvgl.stanford.edu/data2/ShapeNetRendering.tgz -P datasets
!wget http://cvgl.stanford.edu/data2/ShapeNetVox32.tgz -P datasets
print('Extracting ...')
!tar -xvf datasets/ShapeNetRendering.tgz -d datasets
!tar -xvf datasets/ShapeNetVox32.tgz -d datasets
!rm datasets/ShapeNetRendering.tgz
!rm datasets/ShapeNetVox32.tgz
print('Done.')

### (b) Dataset

In [8]:
from mvcnn_rec.data.shapenet import ShapeNetMultiview

train_dataset = ShapeNetMultiview('train', total_views=24, num_views=12, 
                                  load_mode='mvcnn_rec', # Change to mvcnn to get only images and labels
                                  random_start_view=False)
val_dataset = ShapeNetMultiview('val', total_views=24, num_views=12, 
                                load_mode='mvcnn_rec', # Change to mvcnn to get only images and labels
                                random_start_view=False)

print(f'Length of train set: {len(train_dataset)}') # Expected: 30642
print(f'Length of val set: {len(val_dataset)}') # Expected: 4371

Length of train set: 30642
Length of val set: 4371


In [9]:
train_sample = train_dataset[0] 
print(train_sample["item"].shape) # Expected torch.Size([12, 3, 224, 224])

torch.Size([12, 3, 224, 224])


In [10]:
# Visualize some shapes
from mvcnn_rec.util.visualization import visualize_occupancy

print(f'Name: {train_sample["name"]}')
print(f'Voxel Dimensions: {train_sample["voxel"].shape}')
print(f'Label: {train_sample["label"]}')

visualize_occupancy(train_sample["voxel"].squeeze(), flip_axes=True)

Name: 02691156/70e4200e848e653072ec6e905035e5d7
Voxel Dimensions: (1, 32, 32, 32)
Label: 0


Output()

### (c) Data loader 

In [58]:
train_dataloader = torch.utils.data.DataLoader(
        train_dataset,   # Datasets return data one sample at a time; Dataloaders use them and aggregate samples into batches
        batch_size=4,   # The size of batches is defined here
        shuffle=True,    # Shuffling the order of samples is useful during training to prevent that the network learns to depend on the order of the input data
        num_workers=4,   # Data is usually loaded in parallel by num_workers
        pin_memory=True  # This is an implementation detail to speed up data uploading to the GPU
    )

In [59]:
for i, batch in enumerate(train_dataloader):
    input_data, target_labels = batch['item'], batch['label']
    break

In [60]:
input_data.shape, target_labels.shape # Expected (torch.Size([4, 12, 3, 224, 224]), torch.Size([4]))

(torch.Size([4, 12, 3, 224, 224]), torch.Size([4]))

In [61]:
N,B,C,H,W = input_data.size()
input_data = input_data.view(B, N, C, H, W)
input_data.shape # Expected (torch.Size([12, 4, 3, 224, 224])

torch.Size([12, 4, 3, 224, 224])

## 1. Multiview CNN for Classification

### (a) Model

In [12]:
from mvcnn_rec.model.mvcnn import MVCNN
from mvcnn_rec.util.model import summarize_model

mvcnn = MVCNN(13)
print(summarize_model(mvcnn))  # Expected: Rows 0-68 and TOTAL = 11183181

   | Name                           | Type              | Params  
------------------------------------------------------------------------
0  | encoder_image                  | Sequential        | 11176512
1  | encoder_image.0                | Conv2d            | 9408    
2  | encoder_image.1                | BatchNorm2d       | 128     
3  | encoder_image.2                | ReLU              | 0       
4  | encoder_image.3                | MaxPool2d         | 0       
5  | encoder_image.4                | Sequential        | 147968  
6  | encoder_image.4.0              | BasicBlock        | 73984   
7  | encoder_image.4.0.conv1        | Conv2d            | 36864   
8  | encoder_image.4.0.bn1          | BatchNorm2d       | 128     
9  | encoder_image.4.0.relu         | ReLU              | 0       
10 | encoder_image.4.0.conv2        | Conv2d            | 36864   
11 | encoder_image.4.0.bn2          | BatchNorm2d       | 128     
12 | encoder_image.4.1              | BasicBlock        

In [14]:
multi_images = torch.randn(12, 8, 3, 224, 224) * 2. - 1. # Suppose 12 images per shape, 8 shapes
pred_classes = mvcnn(multi_images)
print('Output tensor shape: ', pred_classes.shape)  # Expected: torch.Size([8, 13]))

Output tensor shape:  torch.Size([8, 13])


### (b) Training script and overfit one shape

In [8]:
from mvcnn_rec.training import train_mvcnn
config = {
    'experiment_name': 'mvcnn_overfitting',
    'device': 'cuda:0',  # change this to cpu if you do not have a GPU
    'is_overfit': True,
    'batch_size': 1,
    'resume_ckpt': None,
    'learning_rate': 0.001,
    'max_epochs': 50,
    'print_every_n': 5,
    'validate_every_n': 10,
    'num_views': 8,
}
train_mvcnn.main(config)  # should be able to get ~0.0 train_loss and ~0.0 val_loss and 100% accuracy

Using device: cuda:0
[004/00000] train_loss: 1.056
[009/00000] train_loss: 0.012
[009/00000] val_loss: 0.000, val_accuracy: 100.000%
[014/00000] train_loss: 0.001
[019/00000] train_loss: 0.000
[019/00000] val_loss: 0.000, val_accuracy: 100.000%
[024/00000] train_loss: 0.000
[029/00000] train_loss: 0.000
[029/00000] val_loss: 0.000, val_accuracy: 100.000%
[034/00000] train_loss: 0.000
[039/00000] train_loss: 0.000
[039/00000] val_loss: 0.000, val_accuracy: 100.000%
[044/00000] train_loss: 0.000
[049/00000] train_loss: 0.000
[049/00000] val_loss: 0.000, val_accuracy: 100.000%


### (b) Finetune one 1 image view

#### FREEZEE backbone. Finetune only FC layer.

In [11]:
config = {
    'experiment_name': '1vcnn_generalize',
    'device': 'cuda:0',  # change this to cpu if you do not have a GPU
    'is_overfit': False,
    'batch_size': 64,
    'resume_ckpt': None,
    'learning_rate': 0.001,
    'max_epochs': 30,
    'print_every_n': 50,
    'validate_every_n': 100,
    'num_views': 1, # Num views from total 24 views (stride = 24 / num_views)
    'random_start_view': True, # Set to False to get views start from idx 0. Otherwise random from [0, stride)
    'freezee_backbone': True
}

In [13]:
from mvcnn_rec.training import train_mvcnn
train_mvcnn.main(config)

Using device: cuda:0
[000/00049] train_loss: 1.720
[000/00099] train_loss: 1.152
[000/00099] val_loss: 1.008, val_accuracy: 71.517%
[000/00149] train_loss: 0.960
[000/00199] train_loss: 0.855
[000/00199] val_loss: 0.803, val_accuracy: 75.635%
[000/00249] train_loss: 0.800
[000/00299] train_loss: 0.750
[000/00299] val_loss: 0.709, val_accuracy: 78.655%
[000/00349] train_loss: 0.708
[000/00399] train_loss: 0.703
[000/00399] val_loss: 0.684, val_accuracy: 78.540%
[000/00449] train_loss: 0.679
[001/00020] train_loss: 0.684
[001/00020] val_loss: 0.641, val_accuracy: 80.096%
[001/00070] train_loss: 0.644
[001/00120] train_loss: 0.630
[001/00120] val_loss: 0.625, val_accuracy: 80.622%
[001/00170] train_loss: 0.642
[001/00220] train_loss: 0.632
[001/00220] val_loss: 0.609, val_accuracy: 80.965%
[001/00270] train_loss: 0.596
[001/00320] train_loss: 0.612
[001/00320] val_loss: 0.601, val_accuracy: 81.720%
[001/00370] train_loss: 0.613
[001/00420] train_loss: 0.615
[001/00420] val_loss: 0.603, va

[015/00264] train_loss: 0.499
[015/00314] train_loss: 0.527
[015/00314] val_loss: 0.498, val_accuracy: 84.031%
[015/00364] train_loss: 0.526
[015/00414] train_loss: 0.468
[015/00414] val_loss: 0.490, val_accuracy: 84.146%
[015/00464] train_loss: 0.505
[016/00035] train_loss: 0.494
[016/00035] val_loss: 0.505, val_accuracy: 84.008%
[016/00085] train_loss: 0.516
[016/00135] train_loss: 0.513
[016/00135] val_loss: 0.497, val_accuracy: 83.734%
[016/00185] train_loss: 0.510
[016/00235] train_loss: 0.474
[016/00235] val_loss: 0.487, val_accuracy: 84.512%
[016/00285] train_loss: 0.500
[016/00335] train_loss: 0.503
[016/00335] val_loss: 0.491, val_accuracy: 84.283%
[016/00385] train_loss: 0.492
[016/00435] train_loss: 0.507
[016/00435] val_loss: 0.500, val_accuracy: 84.031%
[017/00006] train_loss: 0.495
[017/00056] train_loss: 0.506
[017/00056] val_loss: 0.486, val_accuracy: 84.237%
[017/00106] train_loss: 0.520
[017/00156] train_loss: 0.505
[017/00156] val_loss: 0.492, val_accuracy: 84.214%
[

#### Finetune results epoch 30
- [029/00408] val_loss: 0.497, val_accuracy: 84.306%
- [029/00458] train_loss: 0.465

In [14]:
# More 30 epochs
from mvcnn_rec.training import train_mvcnn
config = {
    'experiment_name': '1vcnn_generalize',
    'device': 'cuda:0',  # change this to cpu if you do not have a GPU
    'is_overfit': False,
    'batch_size': 64,
    'resume_ckpt': 'mvcnn_rec/runs/1vcnn_generalize/model_best.ckpt',
    'learning_rate': 0.001,
    'max_epochs': 30,
    'print_every_n': 50,
    'validate_every_n': 100,
    'num_views': 1,
    'random_start_view': True, 
    'freezee_backbone': True
}
train_mvcnn.main(config)

[autoreload of mvcnn_rec.training.train_mvcnn failed: Traceback (most recent call last):
  File "/rhome/cuonghn/.cache/pypoetry/virtualenvs/machine_learning_for_3d_geometry-sN7A7CF0-py3.8/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/rhome/cuonghn/.cache/pypoetry/virtualenvs/machine_learning_for_3d_geometry-sN7A7CF0-py3.8/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 394, in superreload
    module = reload(module)
  File "/usr/lib/python3.8/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/usr/lib/python3.8/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 604, in _exec
  File "<frozen importlib._bootstrap_external>", line 848, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/cluster/51/cuonghn/MVCNN_Reconstruction/mvcnn_rec/t

Using device: cuda:0
[000/00049] train_loss: 0.495
[000/00099] train_loss: 0.468
[000/00099] val_loss: 0.488, val_accuracy: 84.306%
[000/00149] train_loss: 0.469
[000/00199] train_loss: 0.481
[000/00199] val_loss: 0.493, val_accuracy: 83.848%
[000/00249] train_loss: 0.483
[000/00299] train_loss: 0.489
[000/00299] val_loss: 0.490, val_accuracy: 84.374%
[000/00349] train_loss: 0.499
[000/00399] train_loss: 0.504
[000/00399] val_loss: 0.491, val_accuracy: 84.306%
[000/00449] train_loss: 0.489
[001/00020] train_loss: 0.488
[001/00020] val_loss: 0.483, val_accuracy: 85.015%
[001/00070] train_loss: 0.475
[001/00120] train_loss: 0.478
[001/00120] val_loss: 0.473, val_accuracy: 84.626%
[001/00170] train_loss: 0.509
[001/00220] train_loss: 0.465
[001/00220] val_loss: 0.494, val_accuracy: 83.894%
[001/00270] train_loss: 0.528
[001/00320] train_loss: 0.467
[001/00320] val_loss: 0.481, val_accuracy: 84.397%
[001/00370] train_loss: 0.508
[001/00420] train_loss: 0.486
[001/00420] val_loss: 0.478, va

[015/00264] train_loss: 0.501
[015/00314] train_loss: 0.497
[015/00314] val_loss: 0.499, val_accuracy: 84.054%
[015/00364] train_loss: 0.527
[015/00414] train_loss: 0.494
[015/00414] val_loss: 0.489, val_accuracy: 84.306%
[015/00464] train_loss: 0.506
[016/00035] train_loss: 0.478
[016/00035] val_loss: 0.490, val_accuracy: 84.329%
[016/00085] train_loss: 0.500
[016/00135] train_loss: 0.492
[016/00135] val_loss: 0.490, val_accuracy: 84.260%
[016/00185] train_loss: 0.496
[016/00235] train_loss: 0.470
[016/00235] val_loss: 0.485, val_accuracy: 84.534%
[016/00285] train_loss: 0.499
[016/00335] train_loss: 0.468
[016/00335] val_loss: 0.496, val_accuracy: 83.757%
[016/00385] train_loss: 0.511
[016/00435] train_loss: 0.498
[016/00435] val_loss: 0.476, val_accuracy: 85.198%
[017/00006] train_loss: 0.471
[017/00056] train_loss: 0.512
[017/00056] val_loss: 0.491, val_accuracy: 83.894%
[017/00106] train_loss: 0.469
[017/00156] train_loss: 0.470
[017/00156] val_loss: 0.480, val_accuracy: 84.329%
[

#### Performance do not improve. 

#### Finetune backbone resnet

In [15]:
# Finetune backbone: 30 epochs with small lr
from mvcnn_rec.training import train_mvcnn
config = {
    'experiment_name': '1vcnn_generalize_finetune_backbone',
    'device': 'cuda:0',  # change this to cpu if you do not have a GPU
    'is_overfit': False,
    'batch_size': 64,
    'resume_ckpt': 'mvcnn_rec/runs/1vcnn_generalize/model_best.ckpt',
    'learning_rate': 0.0001,
    'max_epochs': 30,
    'print_every_n': 50,
    'validate_every_n': 100,
    'num_views': 1,
    'random_start_view': True, 
    'freezee_backbone': False
}
train_mvcnn.main(config)

Using device: cuda:0


[autoreload of mvcnn_rec.training.train_mvcnn failed: Traceback (most recent call last):
  File "/rhome/cuonghn/.cache/pypoetry/virtualenvs/machine_learning_for_3d_geometry-sN7A7CF0-py3.8/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/rhome/cuonghn/.cache/pypoetry/virtualenvs/machine_learning_for_3d_geometry-sN7A7CF0-py3.8/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 394, in superreload
    module = reload(module)
  File "/usr/lib/python3.8/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/usr/lib/python3.8/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 604, in _exec
  File "<frozen importlib._bootstrap_external>", line 848, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/cluster/51/cuonghn/MVCNN_Reconstruction/mvcnn_rec/t

[000/00049] train_loss: 0.475
[000/00099] train_loss: 0.446
[000/00099] val_loss: 0.470, val_accuracy: 85.427%
[000/00149] train_loss: 0.490
[000/00199] train_loss: 0.448
[000/00199] val_loss: 0.469, val_accuracy: 84.992%
[000/00249] train_loss: 0.443
[000/00299] train_loss: 0.458
[000/00299] val_loss: 0.475, val_accuracy: 84.809%
[000/00349] train_loss: 0.477
[000/00399] train_loss: 0.452
[000/00399] val_loss: 0.470, val_accuracy: 85.152%
[000/00449] train_loss: 0.483
[001/00020] train_loss: 0.472
[001/00020] val_loss: 0.467, val_accuracy: 85.289%
[001/00070] train_loss: 0.498
[001/00120] train_loss: 0.446
[001/00120] val_loss: 0.474, val_accuracy: 84.878%
[001/00170] train_loss: 0.468
[001/00220] train_loss: 0.514
[001/00220] val_loss: 0.468, val_accuracy: 85.106%
[001/00270] train_loss: 0.468
[001/00320] train_loss: 0.456
[001/00320] val_loss: 0.473, val_accuracy: 84.900%
[001/00370] train_loss: 0.451
[001/00420] train_loss: 0.469
[001/00420] val_loss: 0.467, val_accuracy: 85.289%
[

[015/00264] train_loss: 0.446
[015/00314] train_loss: 0.451
[015/00314] val_loss: 0.469, val_accuracy: 84.923%
[015/00364] train_loss: 0.489
[015/00414] train_loss: 0.477
[015/00414] val_loss: 0.465, val_accuracy: 85.198%
[015/00464] train_loss: 0.450
[016/00035] train_loss: 0.468
[016/00035] val_loss: 0.463, val_accuracy: 84.992%
[016/00085] train_loss: 0.514
[016/00135] train_loss: 0.458
[016/00135] val_loss: 0.467, val_accuracy: 85.038%
[016/00185] train_loss: 0.471
[016/00235] train_loss: 0.440
[016/00235] val_loss: 0.464, val_accuracy: 85.289%
[016/00285] train_loss: 0.463
[016/00335] train_loss: 0.446
[016/00335] val_loss: 0.462, val_accuracy: 85.152%
[016/00385] train_loss: 0.481
[016/00435] train_loss: 0.449
[016/00435] val_loss: 0.468, val_accuracy: 85.221%
[017/00006] train_loss: 0.423
[017/00056] train_loss: 0.475
[017/00056] val_loss: 0.463, val_accuracy: 85.152%
[017/00106] train_loss: 0.482
[017/00156] train_loss: 0.443
[017/00156] val_loss: 0.467, val_accuracy: 85.084%
[

In [16]:
# More 30 epochs with higher lr
from mvcnn_rec.training import train_mvcnn
config = {
    'experiment_name': '1vcnn_generalize_finetune_backbone',
    'device': 'cuda:0',  # change this to cpu if you do not have a GPU
    'is_overfit': False,
    'batch_size': 64,
    'resume_ckpt': 'mvcnn_rec/runs/1vcnn_generalize_finetune_backbone/model_best.ckpt',
    'learning_rate': 0.001,
    'max_epochs': 30,
    'print_every_n': 50,
    'validate_every_n': 100,
    'num_views': 1,
    'random_start_view': True, 
    'freezee_backbone': False
}
train_mvcnn.main(config)

Using device: cuda:0
[000/00049] train_loss: 1.078
[000/00099] train_loss: 0.579
[000/00099] 2022-01-08 11:23:16.267280 val_loss: 0.620, val_accuracy: 80.737%
[000/00149] train_loss: 0.539
[000/00199] train_loss: 0.506
[000/00199] 2022-01-08 11:23:26.176667 val_loss: 0.575, val_accuracy: 80.485%
[000/00249] train_loss: 0.542
[000/00299] train_loss: 0.454
[000/00299] 2022-01-08 11:23:35.716679 val_loss: 0.432, val_accuracy: 86.342%
[000/00349] train_loss: 0.438
[000/00399] train_loss: 0.521
[000/00399] 2022-01-08 11:23:45.420904 val_loss: 0.671, val_accuracy: 79.181%
[000/00449] train_loss: 0.468
[001/00020] train_loss: 0.463
[001/00020] 2022-01-08 11:23:55.411970 val_loss: 0.862, val_accuracy: 73.530%
[001/00070] train_loss: 0.481
[001/00120] train_loss: 0.453
[001/00120] 2022-01-08 11:24:05.114953 val_loss: 0.482, val_accuracy: 83.917%
[001/00170] train_loss: 0.507
[001/00220] train_loss: 0.455
[001/00220] 2022-01-08 11:24:14.769860 val_loss: 0.809, val_accuracy: 73.576%
[001/00270] t

[015/00064] train_loss: 0.344
[015/00114] train_loss: 0.351
[015/00114] 2022-01-08 11:34:50.695683 val_loss: 0.349, val_accuracy: 89.179%
[015/00164] train_loss: 0.327
[015/00214] train_loss: 0.338
[015/00214] 2022-01-08 11:34:59.717901 val_loss: 0.633, val_accuracy: 79.890%
[015/00264] train_loss: 0.358
[015/00314] train_loss: 0.355
[015/00314] 2022-01-08 11:35:08.468677 val_loss: 1.879, val_accuracy: 54.816%
[015/00364] train_loss: 0.347
[015/00414] train_loss: 0.336
[015/00414] 2022-01-08 11:35:17.291542 val_loss: 0.414, val_accuracy: 87.257%
[015/00464] train_loss: 0.336
[016/00035] train_loss: 0.361
[016/00035] 2022-01-08 11:35:26.256488 val_loss: 0.408, val_accuracy: 87.303%
[016/00085] train_loss: 0.344
[016/00135] train_loss: 0.357
[016/00135] 2022-01-08 11:35:35.271672 val_loss: 0.415, val_accuracy: 87.257%
[016/00185] train_loss: 0.343
[016/00235] train_loss: 0.338
[016/00235] 2022-01-08 11:35:45.048580 val_loss: 0.375, val_accuracy: 88.378%
[016/00285] train_loss: 0.337
[016

[027/00266] 2022-01-08 11:44:13.175522 val_loss: 0.341, val_accuracy: 88.698%
[027/00316] train_loss: 0.323
[027/00366] train_loss: 0.333
[027/00366] 2022-01-08 11:44:22.774620 val_loss: 0.326, val_accuracy: 89.430%
[027/00416] train_loss: 0.339
[027/00466] train_loss: 0.291
[027/00466] 2022-01-08 11:44:32.422005 val_loss: 0.333, val_accuracy: 89.339%
[028/00037] train_loss: 0.296
[028/00087] train_loss: 0.325
[028/00087] 2022-01-08 11:44:42.307324 val_loss: 0.389, val_accuracy: 87.737%
[028/00137] train_loss: 0.312
[028/00187] train_loss: 0.346
[028/00187] 2022-01-08 11:44:51.877571 val_loss: 0.422, val_accuracy: 86.822%
[028/00237] train_loss: 0.317
[028/00287] train_loss: 0.336
[028/00287] 2022-01-08 11:45:01.610764 val_loss: 0.364, val_accuracy: 88.515%
[028/00337] train_loss: 0.329
[028/00387] train_loss: 0.325
[028/00387] 2022-01-08 11:45:11.267508 val_loss: 0.372, val_accuracy: 88.195%
[028/00437] train_loss: 0.324
[029/00008] train_loss: 0.326
[029/00008] 2022-01-08 11:45:21.43

#### Finetuned give better results. Accuracy to 90%, val loss to 0.314

#### Test finetune backbone from original pretrained resnet

In [57]:
### Fine-tune backbone from scratch
from mvcnn_rec.training import train_mvcnn
config = {
    'experiment_name': '1vcnn_generalize_finetune_backbone_from_resnet',
    'device': 'cuda:0',  # change this to cpu if you do not have a GPU
    'is_overfit': False,
    'batch_size': 64,
    'resume_ckpt': None,
    'learning_rate': 0.001,
    'max_epochs': 30,
    'print_every_n': 50,
    'validate_every_n': 100,
    'num_views': 1,
    'random_start_view': True, 
    'freezee_backbone': False
}
train_mvcnn.main(config)

Using device: cuda:0
[000/00049] train_loss: 0.876
[000/00099] train_loss: 0.640
Num batch val 68
[000/00099] 2022-01-08 15:35:10.162568 val_loss: 1.002, val_accuracy: 68.543%
[000/00149] train_loss: 0.549
[000/00199] train_loss: 0.541
Num batch val 68
[000/00199] 2022-01-08 15:35:18.115786 val_loss: 0.748, val_accuracy: 77.236%
[000/00249] train_loss: 0.555
[000/00299] train_loss: 0.572
Num batch val 68
[000/00299] 2022-01-08 15:35:26.191692 val_loss: 0.797, val_accuracy: 76.253%
[000/00349] train_loss: 0.520
[000/00399] train_loss: 0.515
Num batch val 68
[000/00399] 2022-01-08 15:35:34.096337 val_loss: 0.715, val_accuracy: 79.021%
[000/00449] train_loss: 0.567
[001/00020] train_loss: 0.482
Num batch val 68
[001/00020] 2022-01-08 15:35:42.697681 val_loss: 0.785, val_accuracy: 75.589%
[001/00070] train_loss: 0.484
[001/00120] train_loss: 0.451
Num batch val 68
[001/00120] 2022-01-08 15:35:50.546646 val_loss: 0.668, val_accuracy: 79.295%
[001/00170] train_loss: 0.498
[001/00220] train_l

[011/00080] train_loss: 0.345
[011/00130] train_loss: 0.355
Num batch val 68
[011/00130] 2022-01-08 15:42:14.596163 val_loss: 0.406, val_accuracy: 86.593%
[011/00180] train_loss: 0.381
[011/00230] train_loss: 0.359
Num batch val 68
[011/00230] 2022-01-08 15:42:22.298313 val_loss: 1.004, val_accuracy: 69.778%
[011/00280] train_loss: 0.350
[011/00330] train_loss: 0.370
Num batch val 68
[011/00330] 2022-01-08 15:42:30.093775 val_loss: 0.415, val_accuracy: 86.616%
[011/00380] train_loss: 0.357
[011/00430] train_loss: 0.393
Num batch val 68
[011/00430] 2022-01-08 15:42:37.816203 val_loss: 0.362, val_accuracy: 88.813%
[012/00001] train_loss: 0.351
[012/00051] train_loss: 0.351
Num batch val 68
[012/00051] 2022-01-08 15:42:46.026650 val_loss: 0.440, val_accuracy: 85.541%
[012/00101] train_loss: 0.357
[012/00151] train_loss: 0.357
Num batch val 68
[012/00151] 2022-01-08 15:42:53.867142 val_loss: 0.516, val_accuracy: 84.123%
[012/00201] train_loss: 0.337
[012/00251] train_loss: 0.364
Num batch 

[022/00111] train_loss: 0.345
[022/00161] train_loss: 0.331
Num batch val 68
[022/00161] 2022-01-08 15:49:15.332624 val_loss: 0.356, val_accuracy: 88.858%
[022/00211] train_loss: 0.330
[022/00261] train_loss: 0.361
Num batch val 68
[022/00261] 2022-01-08 15:49:23.169175 val_loss: 0.576, val_accuracy: 81.560%
[022/00311] train_loss: 0.341
[022/00361] train_loss: 0.334
Num batch val 68
[022/00361] 2022-01-08 15:49:30.842597 val_loss: 0.355, val_accuracy: 88.698%
[022/00411] train_loss: 0.351
[022/00461] train_loss: 0.327
Num batch val 68
[022/00461] 2022-01-08 15:49:38.617373 val_loss: 0.514, val_accuracy: 83.574%
[023/00032] train_loss: 0.335
[023/00082] train_loss: 0.307
Num batch val 68
[023/00082] 2022-01-08 15:49:46.863200 val_loss: 0.671, val_accuracy: 78.289%
[023/00132] train_loss: 0.316
[023/00182] train_loss: 0.346
Num batch val 68
[023/00182] 2022-01-08 15:49:54.820217 val_loss: 0.407, val_accuracy: 87.303%
[023/00232] train_loss: 0.333
[023/00282] train_loss: 0.322
Num batch 

#### After 30 epochs: val_acc ~89.7% => Finetune resnet give better model performance

### (c) Training over the entire training set
- First use backbone from finetune single views MVCNN
- Then Finetune the backbone

#### Train MVCNN with freezing pretrained resnet18 

In [48]:
from mvcnn_rec.training import train_mvcnn
config = {
    'experiment_name': 'mvcnn_generalize_8views_freezeebackbone',
    'device': 'cuda:0',  # change this to cpu if you do not have a GPU
    'is_overfit': False,
    'batch_size': 16,
    'resume_ckpt': None,
    'learning_rate': 0.001,
    'max_epochs': 10,
    'print_every_n': 50,
    'validate_every_n': 100,
    'num_views': 8, # Num views from total 24 views (stride = 24 / num_views)
    'random_start_view': True, # Set to False to get views start from idx 0
    'freezee_backbone': True # Set to False to finetune the backbone
}
train_mvcnn.main(config)

Using device: cuda:0
[000/00049] train_loss: 2.602
[000/00099] train_loss: 2.468
Num batch val 273
[000/00099] 2022-01-08 15:20:51.280152 val_loss: 2.752, val_accuracy: 18.188%
[000/00149] train_loss: 2.584
[000/00199] train_loss: 2.545
Num batch val 273
[000/00199] 2022-01-08 15:21:12.398132 val_loss: 2.676, val_accuracy: 18.188%
[000/00249] train_loss: 2.622
[000/00299] train_loss: 2.580
Num batch val 273
[000/00299] 2022-01-08 15:21:33.352946 val_loss: 2.596, val_accuracy: 22.695%
[000/00349] train_loss: 2.490
[000/00399] train_loss: 2.623
Num batch val 273
[000/00399] 2022-01-08 15:21:54.513720 val_loss: 2.395, val_accuracy: 21.666%
[000/00449] train_loss: 2.482
[000/00499] train_loss: 2.577
Num batch val 273
[000/00499] 2022-01-08 15:22:15.410685 val_loss: 2.552, val_accuracy: 17.502%
[000/00549] train_loss: 2.435
[000/00599] train_loss: 2.484
Num batch val 273
[000/00599] 2022-01-08 15:22:36.563262 val_loss: 2.473, val_accuracy: 26.104%
[000/00649] train_loss: 2.545
[000/00699] t

KeyboardInterrupt: 

#### Train MVCNN and finetune resnet18 at the same time

In [62]:
from mvcnn_rec.training import train_mvcnn
config = {
    'experiment_name': 'mvcnn_generalize_8views_finetune-resnet',
    'device': 'cuda:0',  # change this to cpu if you do not have a GPU
    'is_overfit': False,
    'batch_size': 16,
    'resume_ckpt': None,
    'learning_rate': 0.001,
    'max_epochs': 10,
    'print_every_n': 50,
    'validate_every_n': 100,
    'num_views': 8, # Num views from total 24 views (stride = 24 / num_views)
    'random_start_view': True, # Set to False to get views start from idx 0
    'freezee_backbone': False # Set to False to finetune the backbone
}
train_mvcnn.main(config)

Using device: cuda:0
[000/00049] train_loss: 2.474
[000/00099] train_loss: 2.383
Num batch val 273
[000/00099] 2022-01-08 16:11:16.944299 val_loss: 2.403, val_accuracy: 19.355%
[000/00149] train_loss: 2.367
[000/00199] train_loss: 2.313
Num batch val 273
[000/00199] 2022-01-08 16:11:44.275021 val_loss: 2.361, val_accuracy: 15.488%
[000/00249] train_loss: 2.386
[000/00299] train_loss: 2.394
Num batch val 273
[000/00299] 2022-01-08 16:12:11.746318 val_loss: 2.462, val_accuracy: 17.136%
[000/00349] train_loss: 2.393
[000/00399] train_loss: 2.343
Num batch val 273
[000/00399] 2022-01-08 16:12:39.321615 val_loss: 2.360, val_accuracy: 15.488%
[000/00449] train_loss: 2.349
[000/00499] train_loss: 2.406
Num batch val 273
[000/00499] 2022-01-08 16:13:06.617697 val_loss: 2.369, val_accuracy: 17.136%
[000/00549] train_loss: 2.351
[000/00599] train_loss: 2.297
Num batch val 273
[000/00599] 2022-01-08 16:13:33.965425 val_loss: 2.356, val_accuracy: 19.446%
[000/00649] train_loss: 2.370
[000/00699] t

KeyboardInterrupt: 

#### Train MVCNN with freezing finetuned (1VCNN) resnet18 

In [None]:
from mvcnn_rec.training import train_mvcnn
config = {
    'experiment_name': 'mvcnn_generalize_8views_from_1view',
    'device': 'cuda:0',  # change this to cpu if you do not have a GPU
    'is_overfit': False,
    'batch_size': 16,
    'resume_ckpt': 'mvcnn_rec/runs/1vcnn_generalize_finetune_backbone/model_best.ckpt',
    'learning_rate': 0.0002,
    'max_epochs': 1,
    'print_every_n': 50,
    'validate_every_n': 100,
    'num_views': 8, # Num views from total 24 views (stride = 24 / num_views)
    'random_start_view': True, # Set to False to get views start from idx 0
    'freezee_backbone': True # Set to False to finetune the backbone
}
train_mvcnn.main(config)

Using device: cuda:0
[000/00049] train_loss: 3.228
[000/00099] train_loss: 2.765
Num batch val 273
[000/00099] 2022-01-08 16:32:28.901943 val_loss: 0.082, val_accuracy: 98.238%
[000/00149] train_loss: 2.777
[000/00199] train_loss: 2.682
Num batch val 273
[000/00199] 2022-01-08 16:32:50.164934 val_loss: 0.095, val_accuracy: 97.483%
[000/00249] train_loss: 2.736
[000/00299] train_loss: 2.664
Num batch val 273
[000/00299] 2022-01-08 16:33:11.265362 val_loss: 0.126, val_accuracy: 97.049%
[000/00349] train_loss: 2.686
[000/00399] train_loss: 2.720
Num batch val 273
[000/00399] 2022-01-08 16:33:32.332027 val_loss: 0.136, val_accuracy: 97.278%
[000/00449] train_loss: 2.695
[000/00499] train_loss: 2.718
Num batch val 273
[000/00499] 2022-01-08 16:33:53.131767 val_loss: 0.135, val_accuracy: 97.163%
[000/00549] train_loss: 2.682
[000/00599] train_loss: 2.716
Num batch val 273
[000/00599] 2022-01-08 16:34:14.277853 val_loss: 0.115, val_accuracy: 97.506%
[000/00649] train_loss: 2.717
[000/00699] t

#### Verify very high val accuracy

### (d) Predict and Visualize

In [35]:
from mvcnn_rec.inference.infer_mvcnn import InferenceHandlerMVCNN
ckpt = 'mvcnn_rec/runs/mvcnn_generalize_8views_from_1view/model_best.ckpt'
inferer = InferenceHandlerMVCNN(ckpt)

In [54]:
val_dataset = ShapeNetMultiview('val', total_views=24, num_views=8, 
                                load_mode='mvcnn_rec', # Change to mvcnn to get only images and labels
                                random_start_view=True)
len(val_dataset)

4371

In [55]:
### Random an data
idx = np.random.randint(0, len(val_dataset))
sample = val_dataset[idx]

print(f'Name: {sample["name"]}')
print(f'Images Dimensions: {sample["item"].shape}')
print(f'Voxel Dimensions: {sample["voxel"].shape}')
print(f'Label: {sample["label"]}, Name: {ShapeNetMultiview.id_class_mapping[sample["label"]]}')

visualize_occupancy(sample["voxel"].squeeze(), flip_axes=True)

Name: 03001627/a1734a851af178bee15475f0b1eb22aa
Images Dimensions: torch.Size([8, 3, 224, 224])
Voxel Dimensions: (1, 32, 32, 32)
Label: 4, Name: chair


Output()

#### Predict

In [56]:
inferer.infer_single(sample["item"])

'chair'

## TODO: 2. Multiview CNN for Classification and Reconstruction

In [21]:
from mvcnn_rec.model.mvcnn_rec import MVCNNClassification, MVCNNReconstruction

mvcnn_reconstruct = MVCNNReconstruction()
print(summarize_model(mvcnn_reconstruct))  # Expected: Rows 0-71 and TOTAL = 12702524

    | Name                                     | Type                | Params  
-------------------------------------------------------------------------------------
0   | encoder_image                            | ResNet              | 11689512
1   | encoder_image.conv1                      | Conv2d              | 9408    
2   | encoder_image.bn1                        | BatchNorm2d         | 128     
3   | encoder_image.relu                       | ReLU                | 0       
4   | encoder_image.maxpool                    | MaxPool2d           | 0       
5   | encoder_image.layer1                     | Sequential          | 147968  
6   | encoder_image.layer1.0                   | BasicBlock          | 73984   
7   | encoder_image.layer1.0.conv1             | Conv2d              | 36864   
8   | encoder_image.layer1.0.bn1               | BatchNorm2d         | 128     
9   | encoder_image.layer1.0.relu              | ReLU                | 0       
10  | encoder_image.layer1.0.conv2

In [48]:
multi_images = torch.randn(24, 5, 3, 222, 222) * 2. - 1. # Suppose 24 images per shape, # 3 shapes
pred_voxels, pred_classes = mvcnn_reconstruct(multi_images)

print('Output tensor shape: ', pred_voxels.shape, pred_classes.shape)  # Expected: torch.Size(torch.Size([1, 32, 32, 32]) torch.Size([1, 12]))

NameError: name 'mvcnn_reconstruct' is not defined

### (b) Training script and overfit one shape

In [None]:
from mvcnn_rec.training import train_mvcnn_rec
config = {
    'experiment_name': 'mvcnn_rec_overfitting',
    'device': 'cuda:0',  # change this to cpu if you do not have a GPU
    'is_overfit': True,
    'batch_size': 4,
    'resume_ckpt': None,
    'learning_rate': 0.001,
    'max_epochs': 20,
    'print_every_n': 10,
    'validate_every_n': 25,
}
train_mvcnn.main(config)  # should be able to get <0.0025 train_loss and <0.13 val_loss

### (c) Training over the entire training set

In [None]:
from mvcnn_rec.training import train_mvcnn
config = {
    'experiment_name': 'mvcnn_rec_generalize',
    'device': 'cuda:0',  # change this to cpu if you do not have a GPU
    'is_overfit': True,
    'batch_size': 4,
    'resume_ckpt': None,
    'learning_rate': 0.001,
    'max_epochs': 20,
    'print_every_n': 10,
    'validate_every_n': 25,
}
train_mvcnn.main(config)  # should be able to get <0.0025 train_loss and <0.13 val_loss

### (d) Inference and visualize