# **Install and import required packages**


In [1]:
import ipywidgets as widgets
import numpy as np
import math
import random
import os
# import scipy.spatial.distance
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import plotly.graph_objects as go
import plotly.express as px

# **Mount Google drive**

In [None]:
from google.colab import drive
from google.colab import files
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Download dataset**

In [2]:
!wget http://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip
!unzip -q ModelNet10.zip

--2020-12-15 09:16:28--  http://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip
Resolving 3dvision.princeton.edu (3dvision.princeton.edu)... 128.112.136.61
Connecting to 3dvision.princeton.edu (3dvision.princeton.edu)|128.112.136.61|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 473402300 (451M) [application/zip]
Saving to: ‘ModelNet10.zip’


2020-12-15 09:16:58 (15.3 MB/s) - ‘ModelNet10.zip’ saved [473402300/473402300]



In [16]:
def read_off(file):
    if 'OFF' != file.readline().strip():
        raise('Not a valid OFF header')
    n_verts, n_faces, __ = tuple([int(s) for s in file.readline().strip().split(' ')])
    verts = [[float(s) for s in file.readline().strip().split(' ')] for i_vert in range(n_verts)]
    faces = [[int(s) for s in file.readline().strip().split(' ')][1:] for i_face in range(n_faces)]
    return verts, faces

def visualize(data):
    m = {
        'type':'buttons','showactive':False,
        'y':1,'x':0.8, 'xanchor':'left','yanchor':'bottom',
        'pad':dict(t=45, r=10),
    }
    fig = go.Figure(data=data, layout=go.Layout(updatemenus=[m]))
    return fig

In [20]:
class PC:
    def __init__(self, verts, faces):
        self.verts = verts
        self.faces = faces
        self.points_sample()
        # self.transform = transforms.Compose([
        #     self.normalize(),
        #     self.rand_rotate(),
        #     self.rand_noise(),
        #     self.tensor()
        # ])
    
    def points_sample(self, output_size=1024):
        verts, faces = np.array(self.verts), self.faces
        tri_area = lambda a, b, c: abs(0.5 * (((b[0]-a[0])*(c[1]-a[1]))-((c[0]-a[0])*(b[1]-a[1]))))
        face_area = lambda i: tri_area(verts[faces[i][0]],verts[faces[i][1]],verts[faces[i][2]])
        sample_faces = random.choices(faces, 
                                      weights=np.array([face_area(i) for i in range(len(faces))]), 
                                      cum_weights=None, k=output_size)
        sample_points = np.zeros((output_size, 3))
        lookup = lambda v,f,i: (v[f[i][0]], v[f[i][1]], v[f[i][2]])

        for i in range(len(sample_faces)):
            v1, v2, v3 = lookup(verts, sample_faces, i)
            # verts[sample_faces[i][0]], verts[sample_faces[i][1]], verts[sample_faces[i][2]]
            s, t = sorted([random.random(), random.random()])
            f = lambda i: s * v1[i] + (t-s)*v2[i] + (1-t)*v3[i]
            sample_points[i] = (f(0), f(1), f(2))
        self.pointcloud = sample_points

    def normalize(self):
        norm_p = self.pointcloud - np.mean(self.pointcloud, axis=0) 
        norm_p = norm_p/np.max(np.linalg.norm(norm_p, axis=1))
        self.pointcloud = norm_p

    def rand_rotate(self):
        theta = random.random() * 2. * math.pi
        c, s = math.cos(theta), math.sin(theta)
        m = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
        self.pointcloud = m.dot(self.pointcloud.T).T

    def rand_noise(self):
        self.pointcloud = self.pointcloud + np.random.normal(0, 0.02, (self.pointcloud.shape))

    def tensor(self):
        return torch.from_numpy(self.pointcloud)

    def show_mesh(self):
        i,j,k = np.array(self.faces).T
        x,y,z = np.array(self.verts).T
        data = [go.Mesh3d(x=x, y=y, z=z, color='lightpink', opacity=0.50, i=i,j=j,k=k)]
        fig = visualize(data)
        camera = dict(
            up=dict(x=0, y=0, z=1),
            center=dict(x=0, y=0, z=0),
            eye=dict(x=5.25, y=5.25, z=1.25)
        )
        fig.update_layout(scene_camera=camera)

        fig.show()

    def show_scatter(self):
        x,y,z = np.array(self.verts).T
        data = [go.Scatter3d(x=x, y=y, z=z, mode='markers')]
        fig = visualize(data)
        fig.show()

    def show_point_cloud(self):
        p = self.pointcloud
        data = [go.Scatter3d(x=p[:,0], y=p[:,1], z=p[:,2], mode='markers')]
        fig = visualize(data).update_traces(
                                        marker=dict(size=2, 
                                        line=dict(width=2, color='DarkSlateGrey')), 
                                        selector=dict(mode='markers'))
        fig.show()

In [5]:
class PCDataset:
    def __init__(self, root_dir, folder="train"):
        self.root_dir = root_dir
        self.classes = self.get_categories()
        self.files = []

        for category in self.classes.keys():
            new_dir = os.path.join(root_dir, category, folder)
            for fn in os.listdir(new_dir):
                if fn.endswith('.off'):
                    self.files.append(dict(fn=os.path.join(new_dir, fn), category=category))

    def __getitem__(self, i):
        fn = self.files[i]['fn']
        category = self.files[i]['category']
        with open(fn, 'r') as f:
            verts, faces = read_off(f)
            # pointcloud = self.transforms((read_off(f)))

            pointcloud = PC(verts, faces)
            # pointcloud.points_sample()
            pointcloud.normalize()
            pointcloud.rand_rotate()
            pointcloud.rand_noise()

        return {'pointcloud': pointcloud.tensor(), 
                'category': self.classes[category]}
    def __len__(self):
        return len(self.files)

    def get_categories(self):
        list_dir = sorted(os.listdir(self.root_dir))
        list_dir = [dir for dir in list_dir if os.path.isdir(os.path.join(self.root_dir, dir))]
        classes = {folder: i for i, folder in enumerate(list_dir)}
        return classes


# **Visualize Data**

In [6]:
root_dir = "./ModelNet10"
with open(os.path.join(root_dir, "chair/train/chair_0001.off"), 'r') as f:
    verts, faces = read_off(f)
    p = PC(verts, faces)

In [None]:
# picker = widgets.Dropdown(options=['mesh', 'scatter', 'point_cloud'], value='mesh')
# picker

In [21]:
p.show_mesh()

In [None]:
p.show_scatter()

In [None]:
p.points_sample()
p.normalize()
p.rand_rotate()
p.rand_noise()
p.show_point_cloud()

In [None]:
train_ds = PCDataset('./ModelNet10', folder="train")
valid_ds = PCDataset('./ModelNet10', folder="test")

train_loader = DataLoader(dataset=train_ds, batch_size=32, shuffle=True)
valid_loader = DataLoader(dataset=valid_ds, batch_size=64)

classes = {i: cat for cat, i in train_ds.classes.items()};
print('\n'.join("{}: {}".format(i, classes[i]) for i in classes))
print('\n')
print('Train dataset size: ', len(train_ds))
print('Valid dataset size: ', len(valid_ds))
print('Number of classes: ', len(train_ds.classes))
print('Sample pointcloud shape: ', train_ds[0]['pointcloud'].size())
# print('Sample pointcloud shape: ', train_ds[0]['pointcloud'].pointcloud.shape)
print('Class: ', classes[train_ds[0]['category']])

0: bathtub
1: bed
2: chair
3: desk
4: dresser
5: monitor
6: night_stand
7: sofa
8: table
9: toilet


Train dataset size:  3991
Valid dataset size:  908
Number of classes:  10
Sample pointcloud shape:  torch.Size([1024, 3])
Class:  bathtub


In [None]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

class Tnet(nn.Module):
    def __init__(self, k=3):
        super().__init__()
        self.conv1 = nn.Sequential(nn.Conv1d(k,64,1), nn.BatchNorm1d(64))
        self.conv2 = nn.Sequential(nn.Conv1d(64,128,1), nn.BatchNorm1d(128))
        self.conv3 = nn.Sequential(nn.Conv1d(128,1024,1), nn.BatchNorm1d(1024))
        self.fc1 = nn.Sequential(nn.Linear(1024,512), nn.BatchNorm1d(512))
        self.fc2 = nn.Sequential(nn.Linear(512,256), nn.BatchNorm1d(256))
        self.fc3 = nn.Linear(256, k*k)
        self.k = k

    def forward(self, input):
        self.conv1_out = F.relu(self.conv1(input))
        self.conv2_out = F.relu(self.conv2(self.conv1_out))
        self.conv3_out = F.relu(self.conv3(self.conv2_out))

        maxpool = nn.Flatten(1)(nn.MaxPool1d(1024)(self.conv3_out)) # torch.Size([32, 1024])
        self.fc1_out = F.relu(self.fc1(maxpool))
        self.fc2_out = F.relu(self.fc2(self.fc1_out))
        
        #initialize as identity
        batch_size = input.size(0) # torch.Size([32, 3, 1024])
        identity = torch.eye(self.k, requires_grad=True).repeat(batch_size,1,1).cuda() # torch.Size([32, k, k])
        matrix = self.fc3(self.fc2_out).view(-1, self.k, self.k) + identity
        return matrix


class PointNet(nn.Module):
    def __init__(self, classes = 10):
        super().__init__()
        self.input_transform = Tnet(k=3)
        self.feature_transform = Tnet(k=64)
        self.conv1 = nn.Sequential(nn.Conv1d(3,64,1), nn.BatchNorm1d(64))
        self.conv2 = nn.Sequential(nn.Conv1d(64,128,1), nn.BatchNorm1d(128))
        self.conv3 = nn.Sequential(nn.Conv1d(128,1024,1), nn.BatchNorm1d(1024))

        self.fc1 = nn.Sequential(nn.Linear(1024, 512), nn.BatchNorm1d(512))
        self.fc2 = nn.Sequential(nn.Linear(512, 256), nn.Dropout(p=0.3), nn.BatchNorm1d(256))
        self.fc3 = nn.Sequential(nn.Linear(256, classes), nn.LogSoftmax(dim=1))

    def forward(self, input):
        matrix3x3 = self.input_transform(input)
        trans_input = torch.bmm(torch.transpose(input,1,2), matrix3x3).transpose(1,2)
        self.conv1_out = F.relu(self.conv1(trans_input))

        matrix64x64 = self.feature_transform(self.conv1_out)
        trans_feature = torch.bmm(torch.transpose(self.conv1_out,1,2), matrix64x64).transpose(1,2)
        self.conv2_out = F.relu(self.conv2(trans_feature))
        self.conv3_out = self.conv3(self.conv2_out)
        self.trans_output = nn.Flatten(1)(nn.MaxPool1d(1024)(self.conv3_out))

        self.fc1_out = F.relu(self.fc1(self.trans_output))
        self.fc2_out = F.relu(self.fc2(self.fc1_out))
        self.output = self.fc3(self.fc2_out)
        return self.output, matrix3x3, matrix64x64

In [None]:
def pointnetloss(outputs, labels, m3x3, m64x64, alpha = 0.0001):
    criterion = torch.nn.NLLLoss()
    bs=outputs.size(0)
    id3x3 = torch.eye(3, requires_grad=True).repeat(bs,1,1)
    id64x64 = torch.eye(64, requires_grad=True).repeat(bs,1,1)
    if outputs.is_cuda:
        id3x3=id3x3.cuda()
        id64x64=id64x64.cuda()
    diff3x3 = id3x3-torch.bmm(m3x3,m3x3.transpose(1,2))
    diff64x64 = id64x64-torch.bmm(m64x64,m64x64.transpose(1,2))
    return criterion(outputs, labels) + alpha * (torch.norm(diff3x3)+torch.norm(diff64x64)) / float(bs)


def train(train_loader, val_loader=None,  epochs=15, save=True):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    pointnet = PointNet()
    pointnet.to(device);
    optimizer = torch.optim.Adam(pointnet.parameters(), lr=0.001)
    
    for epoch in range(epochs): 
        pointnet.train()
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data['pointcloud'].to(device).float(), data['category'].to(device)
            optimizer.zero_grad()
            outputs, m3x3, m64x64 = pointnet(inputs.transpose(1,2))

            loss = pointnetloss(outputs, labels, m3x3, m64x64)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 10 == 9:    # print every 10 mini-batches
                    print('[Epoch: %d, Batch: %4d / %4d], loss: %.3f' %
                        (epoch + 1, i + 1, len(train_loader), running_loss / 10))
                    running_loss = 0.0

        pointnet.eval()
        correct = total = 0

        # validation
        if val_loader:
            with torch.no_grad():
                for data in val_loader:
                    inputs, labels = data['pointcloud'].to(device).float(), data['category'].to(device)
                    outputs, __, __ = pointnet(inputs.transpose(1,2))
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
            val_acc = 100. * correct / total
            print('Valid accuracy: %d %%' % val_acc)

        # save the model
        if save:
            torch.save(pointnet.state_dict(), "./drive/My Drive/point_net_models/save_"+str(epoch)+".pth")

In [None]:
train(train_loader, valid_loader, save=True)

[Epoch: 1, Batch:   10 /  125], loss: 2.155
[Epoch: 1, Batch:   20 /  125], loss: 1.684
[Epoch: 1, Batch:   30 /  125], loss: 1.575
[Epoch: 1, Batch:   40 /  125], loss: 1.492
[Epoch: 1, Batch:   50 /  125], loss: 1.252
[Epoch: 1, Batch:   60 /  125], loss: 1.201
[Epoch: 1, Batch:   70 /  125], loss: 1.134
[Epoch: 1, Batch:   80 /  125], loss: 1.013
[Epoch: 1, Batch:   90 /  125], loss: 1.015
[Epoch: 1, Batch:  100 /  125], loss: 0.957
[Epoch: 1, Batch:  110 /  125], loss: 0.927
[Epoch: 1, Batch:  120 /  125], loss: 0.957
Valid accuracy: 56 %
[Epoch: 2, Batch:   10 /  125], loss: 0.940
[Epoch: 2, Batch:   20 /  125], loss: 0.954
[Epoch: 2, Batch:   30 /  125], loss: 0.782
[Epoch: 2, Batch:   40 /  125], loss: 0.854
[Epoch: 2, Batch:   50 /  125], loss: 0.803
[Epoch: 2, Batch:   60 /  125], loss: 0.677
[Epoch: 2, Batch:   70 /  125], loss: 0.816
[Epoch: 2, Batch:   80 /  125], loss: 0.771
[Epoch: 2, Batch:   90 /  125], loss: 0.682
[Epoch: 2, Batch:  100 /  125], loss: 0.754
[Epoch: 2, 

KeyboardInterrupt: ignored