## Reqruirements

In [None]:
import os
import random

import pandas as pd
import open3d as o3d
from joblib import load, dump

import torch
from scipy.special import softmax
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch import nn
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

from sklearn.metrics import confusion_matrix

random.seed = 42

### MODEL

In [None]:
# Used for extracting features giving a 1 dimensional vector for point net

class Tnet(nn.Module):
   def __init__(self, k=3):
      super().__init__()
      self.k=k
      self.conv1 = nn.Conv1d(k,64,1)
      self.conv2 = nn.Conv1d(64,128,1)
      self.conv3 = nn.Conv1d(128,1024,1)
      self.fc1 = nn.Linear(1024,512)
      self.fc2 = nn.Linear(512,256)
      self.fc3 = nn.Linear(256,k*k)

      self.bn1 = nn.BatchNorm1d(64)
      self.bn2 = nn.BatchNorm1d(128)
      self.bn3 = nn.BatchNorm1d(1024)
      self.bn4 = nn.BatchNorm1d(512)
      self.bn5 = nn.BatchNorm1d(256)
       

   def forward(self, input):
      # input.shape == (bs,n,3)
      bs = input.size(0)
      xb = F.relu(self.bn1(self.conv1(input)))
      xb = F.relu(self.bn2(self.conv2(xb)))
      xb = F.relu(self.bn3(self.conv3(xb)))
      pool = nn.MaxPool1d(xb.size(-1))(xb)
      flat = nn.Flatten(1)(pool)
      xb = F.relu(self.bn4(self.fc1(flat)))
      xb = F.relu(self.bn5(self.fc2(xb)))
      
      #initialize as identity
      init = torch.eye(self.k, requires_grad=True).repeat(bs,1,1)
      if xb.is_cuda:
        init=init.cuda()
      matrix = self.fc3(xb).view(-1,self.k,self.k) + init
      return matrix

# Used for position estimation and point estimation using global and
#  local coordinates
class Transform(nn.Module):
   def __init__(self):
        super().__init__()
        self.input_transform = Tnet(k=3)
        self.feature_transform = Tnet(k=64)
        self.conv1 = nn.Conv1d(3,64,1)#(3,64,1)

        self.conv2 = nn.Conv1d(64,128,1)
        self.conv3 = nn.Conv1d(128,1024,1)
       

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
       
   def forward(self, input):
        matrix3x3 = self.input_transform(input)
        # batch matrix multiplication
        xb = torch.bmm(torch.transpose(input,1,2), matrix3x3).transpose(1,2)

        xb = F.relu(self.bn1(self.conv1(xb)))

        matrix64x64 = self.feature_transform(xb)
        xb = torch.bmm(torch.transpose(xb,1,2), matrix64x64).transpose(1,2)

        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = self.bn3(self.conv3(xb))
        xb = nn.MaxPool1d(xb.size(-1))(xb)
        output = nn.Flatten(1)(xb)
        return output, matrix3x3, matrix64x64

# Classifier
class PointNet(nn.Module):
    def __init__(self, classes = 2):
        super().__init__()
        self.transform = Transform()
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, classes)
        

        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(256)
        self.dropout = nn.Dropout(p=0.3)
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def forward(self, input):
      # input of size (batch_size, 3, sample_rate)
        xb, matrix3x3, matrix64x64 = self.transform(input) # Returns (batch_size, 1024) dimensional vector
        xb = F.relu(self.bn1(self.fc1(xb)))
        xb = F.relu(self.bn2(self.dropout(self.fc2(xb))))
        output = self.fc3(xb)
        return self.logsoftmax(output), matrix3x3, matrix64x64

### PointNet loss

In [None]:
# PointNet loss calculating with first 3 model outputs

# LOSS
def pointnetloss(outputs, labels, m3x3, m64x64, alpha = 0.0001):
    criterion = torch.nn.NLLLoss()
    bs = outputs.size(0)
    id3x3 = torch.eye(3, requires_grad=True).repeat(bs, 1, 1)
    id64x64 = torch.eye(64, requires_grad=True).repeat(bs, 1, 1)
    if outputs.is_cuda:
        id3x3 = id3x3.cuda()
        id64x64 = id64x64.cuda()
    diff3x3 = id3x3 - torch.bmm(m3x3, m3x3.transpose(1, 2))
    diff64x64 = id64x64 - torch.bmm(m64x64, m64x64.transpose(1, 2))
    return criterion(outputs, labels) + alpha * (torch.norm(diff3x3) + torch.norm(diff64x64)) / float(bs)

In [None]:
# Normalization used as a preprocessing step
class Normalize(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2
        
        norm_pointcloud = pointcloud - np.mean(pointcloud, axis=0) 
        norm_pointcloud /= np.max(np.linalg.norm(norm_pointcloud, axis=1))

        return  norm_pointcloud

def default_transforms():
    return transforms.Compose([
                                Normalize(),
                                
                              ])

class PointCloudData(Dataset):
    def __init__(self, dataframe_path, valid=False, sample_rate=1024, transform=default_transforms()):
        # Get data
        self.df = pd.read_csv(dataframe_path)
        # class dict
        self.classes = {"anomaly": 1, "normal": 0}
        self.sample_rate=sample_rate
        self.transforms=transform

    def __len__(self):
        return self.df.shape[0]

    def __preproc__(self, file):
      # Cloud is loaded
        point_cloud = o3d.io.read_point_cloud(file)
        # Cloud is reduced to fixed size for ingestion into model
        resampled = point_cloud.farthest_point_down_sample(num_samples=self.sample_rate)
        np_array = np.array(resampled.points)
        if self.transforms:
            pointcloud = self.transforms(np_array)
        return torch.from_numpy(pointcloud)

    def __getitem__(self, idx):
        pcd_path = os.path.abspath(self.df.iloc[idx]['object_path'])
        category = self.df.iloc[idx]['label']
        pointcloud = self.__preproc__(pcd_path)
        return {'pointcloud': pointcloud, 
                'category': self.classes[category]}



In [None]:
# Load datasets generated in Random Forest notebook
train_df = pd.read_csv("./data/train_df.csv")
test_df = pd.read_csv("./data/test_df.csv")

### Training

In [None]:
# Model parameters
batch_size = 32
learning_rate = 0.0001
sample_rate = 1024
epochs = 10

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

pointnet = PointNet()
pointnet.to(device)
optimizer = torch.optim.Adam(pointnet.parameters(), lr=learning_rate)


train_ds = PointCloudData("./data/train_df.csv", sample_rate=sample_rate)
train_loader = DataLoader(dataset=train_ds, batch_size=batch_size, shuffle=True)

### We use the below function to train both versions of PointNet

In [None]:
def train(model, train_loader, learning_rate, epochs=5, model_name=""):
      for epoch in range(epochs): 
        model.train()
        running_loss = 0.0
        batch_loss = []
        for i, data in enumerate(train_loader, 0):
            pcd = data['pointcloud'].to(device).float()
            labels = data['category'].to(device)
            optimizer.zero_grad()

            outputs, m3x3, m64x64 = model(pcd.transpose(1,2))
            loss = pointnetloss(outputs, labels, m3x3, m64x64, alpha=learning_rate)
            loss.backward()
            optimizer.step()


            running_loss += loss.item()
            if i % 10 == 9:    # print every 10 mini-batches
                    print('[Epoch: %d, Batch: %4d / %4d], loss: %.3f' %
                        (epoch + 1, i + 1, len(train_loader), running_loss / 10))
                    running_loss = 0.0

        torch.save(model.state_dict(), f"./model_weights/{model_name}_"+str(epoch)+".pth") 

In [None]:
train(pointnet, train_loader, learning_rate, save=True, epochs=epochs, model_name="PointNet")

### Model evaluation

Here we evaluate the model for each epoch to select best results

In [None]:
test_ds = PointCloudData("./data/test_df.csv", valid=True)
test_loader = DataLoader(dataset=test_ds, batch_size=batch_size)
for i in range(epochs):
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  print(device)
  print(f"EPOCH: {i}")

  pointnet = PointNet()
  pointnet.to(device)
  optimizer = torch.optim.Adam(pointnet.parameters(), lr=learning_rate)

  pointnet.load_state_dict(torch.load(f"./model_weights/PointNet_"+str(i)+".pth"))
  pointnet.eval();
  all_preds = []
  all_labels = []
  with torch.no_grad():
      for i, data in enumerate(test_loader):
          print('Batch [%4d / %4d]' % (i+1, len(test_loader)))
                    
          pcd = data['pointcloud'].to(device).float()
          labels = data['category'].to(device)
          optimizer.zero_grad()

          outputs, m3x3, m64x64 = pointnet(pcd.transpose(1,2))
          _, preds = torch.max(outputs.data, 1)
          all_preds += list(preds.cpu().numpy())
          all_labels += list(labels.cpu().numpy())
  cm = confusion_matrix(all_labels, all_preds);
  print(cm)
  print("*"*40)

### 90/10 Class split evaluation

In [None]:
# Loading 90/10 split created in random forest noteobok
test_ds_90_10 = PointCloudData("./data/test_df_90_10.csv", valid=True)
test_loader_90_10 = DataLoader(dataset=test_ds_90_10, batch_size=batch_size)
for i in range(epochs):
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  print(device)
  print(f"EPOCH: {i}")

  pointnet = PointNet()
  pointnet.to(device)
  optimizer = torch.optim.Adam(pointnet.parameters(), lr=learning_rate)

  pointnet.load_state_dict(torch.load(f"./model_weights/PointNet_"+str(i)+".pth"))
  pointnet.eval();
  all_preds = []


  all_preds_90_10 = []
  all_labels_90_10 = []
  with torch.no_grad():
      for i, data in enumerate(test_loader_90_10):
          print('Batch [%4d / %4d]' % (i+1, len(test_loader_90_10)))
                    
          pcd = data['pointcloud'].to(device).float()
          labels = data['category'].to(device)
          optimizer.zero_grad()

          outputs, m3x3, m64x64 = pointnet(pcd.transpose(1,2))
          _, preds = torch.max(outputs.data, 1)
          all_preds_90_10 += list(preds.cpu().numpy())
          all_labels_90_10 += list(labels.cpu().numpy())

  cm_90_10 = confusion_matrix(all_labels_90_10, all_preds_90_10);
  print(cm_90_10)
  print("*"*40)

# POINT NET + RandomForestSampling

Here we use the same model with a different data loader to train on Random Forest created samples instead of
Iterative Farthest point samples

In [None]:
batch_size = 32
learning_rate = 0.0001
sample_rate = 1024
epochs = 10

In [None]:
def create_rf_point_samples(df, rf_point, sample_rate, name):
  '''
  Function used for creating data using random forest predictions
  per point as weights.
  Params:
  df (pd.DataFrame)
  rf_point - Pretrained Random Forest propensity model
  sample_rate (int) - Number of samples to select
  name (str) - train/test folders

  Returns:
  pd.DataFrame
  '''
  folder = "./data/rf_point_samples_"+name
  os.makedirs(folder, exist_ok=True)
  new_path_list = []
  for idx, path in enumerate(df["reference_path"]):
    tmp_df = pd.read_csv(path)
    # 
    preds = rf_point.predict_proba(tmp_df.loc[:, ["x", "y", "z", "dist"]].values)[:, 1]
    sampled_inds = np.random.choice(range(tmp_df.shape[0]), size=sample_rate, p=softmax(preds), replace=False)
    resampled_df = tmp_df.iloc[sampled_inds]
    name = pathlib.Path(path).name
    new_path = folder+"/"+ str(idx) + ".csv"
    resampled_df.loc[:, ['x', 'y', 'z']].to_csv(new_path, index=False)
  
    new_path_list.append(new_path)
  df["sampled_pcds"] = new_path_list
  return df

In [None]:
# Cell needs to be run to create Random Forest samples to be used in PointNet(RF)+RandomForest notebook
# Load trained RF model for downsample point selection
rf_point = load("./model_weights/rf_point.joblib")

RF_sampled_df = create_rf_point_samples(train_df, rf_point, sample_rate, "train")
RF_sampled_df.to_csv("./data/RF_sampled_df.csv", index=False)


RF_sampled_df_test = create_rf_point_samples(test_df, rf_point, sample_rate, "test")
RF_sampled_df_test.to_csv("./data/RF_sampled_df_test.csv", index=False)


In [None]:
class PointCloudDataRFSampler(Dataset):
    def __init__(self, dataframe_path, valid=False, sample_rate=1024, transform=default_transforms()):
        self.df = pd.read_csv(dataframe_path)
        self.classes = {"anomaly": 1, "normal": 0}
        self.sample_rate=sample_rate
        self.transforms=transform


    def __len__(self):
        return self.df.shape[0]

    def __preproc__(self, file):
        # Loads point cloud
        resampled = pd.read_csv(file).values
        # Apply transforms (only normalize but additional can be added)
        if self.transforms:
            pointcloud = self.transforms(resampled)
        return torch.from_numpy(pointcloud)

    def __getitem__(self, idx):
        pcd_path = self.df.iloc[idx]['sampled_pcds']
        category = self.df.iloc[idx]['label']
        pointcloud = self.__preproc__(pcd_path)
        return {'pointcloud': pointcloud, 
                'category': self.classes[category]}



In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

pointnet = PointNet()
pointnet.to(device)
optimizer = torch.optim.Adam(pointnet.parameters(), lr=learning_rate)

In [None]:
train_point_ds = PointCloudDataRFSampler("./data/RF_sampled_df.csv", sample_rate=sample_rate)
train_loader = DataLoader(train_point_ds, batch_size=batch_size, shuffle=True)

In [None]:
print(len(train_point_ds))
train(pointnet, train_loader, learning_rate, save=True, epochs=epochs, model_name="PointNet_RF_samples")

### EVAL

In [None]:
batch_size = 32
learning_rate = 0.0001
sample_rate = 1024
epochs = 10

test_point_ds = PointCloudDataRFSampler("./data/RF_sampled_df_test.csv", sample_rate=sample_rate)
test_loader = DataLoader(test_point_ds, batch_size=batch_size)

In [None]:

for i in range(0,10):
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  print(device)

  pointnet = PointNet()
  pointnet.to(device)
  optimizer = torch.optim.Adam(pointnet.parameters(), lr=learning_rate)

  pointnet.load_state_dict(torch.load(f"./model_weights//PointNet_{i}.pth"))

  pointnet.eval();
  all_preds = []
  all_labels = []
  print(f"EPOCH: {i}")
  with torch.no_grad():
      for i, data in enumerate(test_loader):
          print('Batch [%4d / %4d]' % (i+1, len(test_loader)))
                    
          pcd = data['pointcloud'].to(device).float()
          labels = data['category'].to(device)
          optimizer.zero_grad()

          outputs, m3x3, m64x64 = pointnet(pcd.transpose(1,2))
          _, preds = torch.max(outputs.data, 1)
          all_preds += list(preds.cpu().numpy())
          all_labels += list(labels.cpu().numpy())
  cm = confusion_matrix(all_labels, all_preds);
  print(cm)
  print("="*40)

In [None]:
test_90_10 = pd.read_csv("./data/test_df_90_10.csv")
test_df = pd.read_csv("./data/RF_sampled_df_test.csv")
inds = []
for row in test_df.iterrows():
  if row[1]["object_path"] in test_90_10["object_path"].tolist():
    inds.append(row[0])

len(inds)
# Match indices of 90/10 dataset with 50/50 full dataset of RF sampled points
# Save train dataset
test_df.iloc[inds].to_csv("./data/RF_sampled_df_test_90_10.csv", index=False)

test_point_ds_90_10 = PointCloudDataRFSampler("./data/RF_sampled_df_test_90_10.csv", sample_rate=sample_rate)
test_loader_90_10 = DataLoader(test_point_ds_90_10, batch_size=batch_size)

for i in range(0,10):
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  print(device)

  pointnet = PointNet()
  pointnet.to(device)
  optimizer = torch.optim.Adam(pointnet.parameters(), lr=learning_rate)

  pointnet.load_state_dict(torch.load(f"/content/PointNet_RF_samples_{i}.pth"))

  pointnet.eval();
  all_preds = []
  all_labels = []
  print(f"EPOCH: {i}")
  with torch.no_grad():
      for i, data in enumerate(test_loader_90_10):
          print('Batch [%4d / %4d]' % (i+1, len(test_loader_90_10)))
                    
          pcd = data['pointcloud'].to(device).float()
          labels = data['category'].to(device)
          optimizer.zero_grad()

          outputs, m3x3, m64x64 = pointnet(pcd.transpose(1,2))
          _, preds = torch.max(outputs.data, 1)
          all_preds += list(preds.cpu().numpy())
          all_labels += list(labels.cpu().numpy())
  cm = confusion_matrix(all_labels, all_preds);
  print(cm)
  print("="*40)