In [None]:
import torch
torch.cuda.current_device()
torch.cuda.get_device_name(0)

# Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Copy Files

In [None]:
import shutil
import os

os.makedirs('data', exist_ok=True)

shutil.copyfile('/content/drive/My Drive/Alzheimer Competition/point_cloud.zip' , './data/point_cloud.zip')
shutil.copyfile('/content/drive/My Drive/Alzheimer Competition/traintestlist.zip' , './data/traintestlist.zip')

!unzip data/point_cloud.zip;
!unzip data/traintestlist.zip;

os.remove("./data/point_cloud.zip")
os.remove('./data/traintestlist.zip')

shutil.move('./point_cloud', './data/point_cloud')

# Import Libraries

In [None]:
import shutil
import os

import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import glob
import gc

from sklearn.metrics import matthews_corrcoef as mcc

# PyTorch libraries and modules
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.utils.data
from torchvision.models.video import r3d_18

torch.manual_seed(100)

import csv


from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import itertools
MCC_SCORE = -1

# Dataset

### Save point cloud as voxel tensors

In [None]:
import h5py
import os
import pickle

class VoxelTensor:
    def __init__(self, src_path):
        self.data = ""
        self.path = src_path

    def pc2voxel(self, cloud0, cloud1, cloud2, depth=16, height=32, width=32, rotation=0):

        voxel_grid = np.zeros((3, depth, height, width), dtype=np.float16)

        in_depth = max(np.max(cloud0[:, 0]), np.max(cloud1[:, 0]), np.max(cloud2[:, 0]))
        in_height = max(np.max(cloud0[:, 1]), np.max(cloud1[:, 1]), np.max(cloud2[:, 1]))
        in_width = max(np.max(cloud0[:, 2]), np.max(cloud1[:, 2]), np.max(cloud2[:, 2]))

        if in_depth >= depth:
            depth_ratio = depth / (in_depth + 1)
            cloud0[:, 0] = np.uint32(cloud0[:, 0].astype(float) * depth_ratio)
            cloud1[:, 0] = np.uint32(cloud1[:, 0].astype(float) * depth_ratio)
            cloud2[:, 0] = np.uint32(cloud2[:, 0].astype(float) * depth_ratio)
        if in_height >= height:
            height_ratio = height / (in_height + 1)
            cloud0[:, 1] = np.uint32(cloud0[:, 1].astype(float) * height_ratio)
            cloud1[:, 1] = np.uint32(cloud1[:, 1].astype(float) * height_ratio)
            cloud2[:, 1] = np.uint32(cloud2[:, 1].astype(float) * height_ratio)
        if in_width >= width:
            width_ratio = width / (in_width + 1)
            cloud0[:, 2] = np.uint32(cloud0[:, 2].astype(float) * width_ratio)
            cloud1[:, 2] = np.uint32(cloud1[:, 2].astype(float) * width_ratio)
            cloud2[:, 2] = np.uint32(cloud2[:, 2].astype(float) * width_ratio)


        if rotation == 1:
            y0, y1, y2 = cloud0[:, 1], cloud1[:, 1], cloud2[:, 1]
            cloud0[:, 1] = cloud0[:, 2]
            cloud1[:, 1] = cloud1[:, 2]
            cloud2[:, 1] = cloud2[:, 2]
            cloud0[:, 2] = in_height - y0
            cloud1[:, 2] = in_height - y1
            cloud2[:, 2] = in_height - y2
        if rotation == 2:
            cloud0[:, 1] = in_height - cloud0[:, 1]
            cloud1[:, 1] = in_height - cloud1[:, 1]
            cloud2[:, 1] = in_height - cloud2[:, 1]
            cloud0[:, 2] = in_width - cloud0[:, 2]
            cloud1[:, 2] = in_width - cloud1[:, 2]
            cloud2[:, 2] = in_width - cloud2[:, 2]
        if rotation == 3:
            x0, x1, x2 = cloud0[:, 2], cloud1[:, 2], cloud2[:, 2]
            cloud0[:, 2] = cloud0[:, 1]
            cloud1[:, 2] = cloud1[:, 1]
            cloud2[:, 2] = cloud2[:, 1]
            cloud0[:, 1] = in_width - x0
            cloud1[:, 1] = in_width - x1
            cloud2[:, 1] = in_width - x2

        voxel_grid[0, cloud0[:, 0], cloud0[:, 1], cloud0[:, 2]] = 1.0
        voxel_grid[1, cloud1[:, 0], cloud1[:, 1], cloud1[:, 2]] = 1.0
        voxel_grid[2, cloud2[:, 0], cloud2[:, 1], cloud2[:, 2]] = 1.0

        return voxel_grid

    def save(self, files_list, dst_path, dim, augment=False):
        list_IDs = list(files_list.keys())
        files_list_aug = {}
        depth = dim[0]
        height = dim[1]
        width = dim[2]

        os.makedirs(dst_path, exist_ok=True)

        for ID in tqdm(list_IDs):
            original_name = ID.replace(".mp4", "")
            f = self.path + original_name + ".h5"

            hf = h5py.File(f, 'r')
            c1 = hf['cloud1'][:]
            c2 = hf['cloud2'][:]
            c3 = hf['cloud3'][:]
            hf.close()

            y = files_list[ID]

            if augment == False:
              f_out = dst_path + original_name + ".pt"
              X = self.pc2voxel(c1, c2, c3, depth=depth, height=height, width=width)
              X = torch.from_numpy(X).float()
              torch.save(X, f_out)

            else:
              for rotation in range(4):
                f_out = dst_path + original_name + str(rotation) + ".pt"
                aug_ID = original_name + str(rotation) + ".mp4"
                X = self.pc2voxel(c1, c2, c3, depth=depth, height=height, width=width, rotation=rotation)
                X = torch.from_numpy(X).float()
                files_list_aug[aug_ID] = y
                torch.save(X, f_out)


        if augment == False:
          return files_list
        else:
          return files_list_aug
        


### Custom Dataset Class

In [None]:
import torch

class VoxelDataset(torch.utils.data.Dataset):
    def __init__(self, files_list, source_path):
        self.list_IDs = list(files_list.keys())
        self.labels = files_list
        self.path = source_path

    def __len__(self):
        return len(self.list_IDs)

    def __getitem__(self, index):
        ID = self.list_IDs[index]
        original_name = ID.replace(".mp4", "")
        f = self.path + original_name + ".pt"

        X = torch.load(f)

        y = self.labels[ID]
        y = torch.tensor(int(y))

        return X, y

### Balanced Batch Sampler

In [None]:
import torchvision
import torch.utils.data
import random


class BalancedBatchSampler(torch.utils.data.sampler.Sampler):
    def __init__(self, dataset):
        self.dataset = {}
        self.balanced_max = 0
        # Save all the indices for all the classes
        for idx in range(0, len(dataset)):
            label = self._get_label(dataset, idx)
            if label not in self.dataset:
                self.dataset[label] = []
            self.dataset[label].append(idx)
            self.balanced_max = len(self.dataset[label]) \
                if len(self.dataset[label]) > self.balanced_max else self.balanced_max
        
        # Oversample the classes with fewer elements than the max
        for label in self.dataset:
            while len(self.dataset[label]) < self.balanced_max:
                self.dataset[label].append(random.choice(self.dataset[label]))
    
        self.keys = list(self.dataset.keys())
        self.currentkey = 0

    def __iter__(self):
        while len(self.dataset[self.keys[self.currentkey]]) > 0:
            yield self.dataset[self.keys[self.currentkey]].pop()
            self.currentkey = (self.currentkey + 1) % len(self.keys)

    
    def _get_label(self, dataset, idx):
        dataset_type = type(dataset)
        if dataset_type is torchvision.datasets.MNIST:
            return dataset.train_labels[idx].item()
        elif dataset_type is torchvision.datasets.ImageFolder:
            return dataset.imgs[idx][1]
        else:
            (image_sequence, target) = dataset.__getitem__(idx)
            return target

    def __len__(self):
        return self.balanced_max*len(self.keys)

### Point cloud files, train list, test list, generate tensors

In [None]:
fold_name = "fold_0"    # specify which train-test split to work with

depth, height, width = 32, 64, 64   # dimension for converting point cloud to voxels


dimension = [depth, height, width]

# Point cloud files
pc_path = "./data/point_cloud/"
files = [f for f in glob.glob(pc_path + "*.h5", recursive=True)]

# Train test list and labels
train_labels_file = "traintestlist/" + fold_name + "_train.csv"
test_labels_file = "traintestlist/" + fold_name + "_test.csv"

train_files_directory = "./dataset/train/"
test_files_directory = "./dataset/test/"



with open(train_labels_file, mode='r') as infile:
    reader = csv.reader(infile)
    train_list = {rows[0]: rows[1] for rows in reader}
    infile.close()
train_list.pop("filename", None)

with open(test_labels_file, mode='r') as infile:
    reader = csv.reader(infile)
    test_list = {rows[0]: rows[1] for rows in reader}
    infile.close()
test_list.pop("filename", None)


In [None]:
train_list = VoxelTensor(pc_path).save(files_list=train_list, dst_path=train_files_directory, dim=dimension, augment=True)
test_list = VoxelTensor(pc_path).save(files_list=test_list, dst_path=test_files_directory, dim=dimension)

train_len, test_len = len(train_list), len(test_list)
print("Train length: " + str(train_len) + " Test Length: " + str(test_len))

### Create Train and Test Datasets

In [None]:
batch_size = 64

train = VoxelDataset(files_list=train_list, source_path=train_files_directory)
#train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=4)
train_loader = torch.utils.data.DataLoader(train,sampler=BalancedBatchSampler(train), batch_size = batch_size, num_workers=4)

test = VoxelDataset(files_list=test_list, source_path=test_files_directory)
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, num_workers=4)

print("Number of Batches: " + str(len(train_loader)))

# Model

In [None]:
shutil.copyfile("/content/drive/My Drive/Alzheimer Competition/3D_pointcloud_SimpleCNN_10for_cloud_size_32_64_64_acc_79.534_mcc_0.465.pth", "weight_3D.pth")
checkpoint_model = "weight_3D.pth"

In [None]:
from torchvision.models.video import r3d_18
#from torchvision.models.video import r2plus1d_18
#from torchvision.models.video import mc3_18

model = r3d_18(pretrained = True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.fc.out_features = 2

# model.load_state_dict(torch.load(checkpoint_model))

In [None]:
#Model Hyperparameters
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
error = nn.CrossEntropyLoss().to(device)   #to use model on gpu

#error = nn.BCEWithLogitsLoss().to(device)

num_epochs = 100
# Optimizer
learning_rate = 1e-3
#optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)  #added L2 regularization
# model


n_iters = 1000
num_epochss = n_iters / (train_len/ batch_size)
num_epochss = int(num_epochs)
print("Total Epochs: " + str(num_epochss))
print("Train Instances: " + str(train_len))
print("Batch Size: " + str(batch_size))

# Visualization and Saving Output

In [None]:
def conf_mat(cf, epoch, acc, mcc, y_true, train_loss, train_acc, test_loss, test_acc):
  try:
    plt.imshow(cf,cmap=plt.cm.RdYlGn,interpolation='nearest')
    plt.colorbar()
    plt.title('Confusion Matrix without Normalization')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    tick_marks = np.arange(len(set(y_true))) # length of classes
    class_labels = ['Non Stalled','Stalled']
    tick_marks
    plt.xticks(tick_marks,class_labels)
    plt.yticks(tick_marks,class_labels)
    # plotting text value inside cells
    thresh = cf.max() / 2.
    for i,j in itertools.product(range(cf.shape[0]),range(cf.shape[1])):
        plt.text(j,i,format(cf[i,j],'d'),horizontalalignment='center',color='white' if cf[i,j] >thresh else 'black')
    #plt.show()
    os.makedirs("confusion_matrix", exist_ok=True)
    plt.savefig(f"confusion_matrix/epoch{epoch}_accuracy{round(acc.item(),3)}_mcc_{round(mcc.item(),3)}.png")
    plt.close('all')

    os.makedirs("loss_acc_curve", exist_ok=True)

    ##creating subplot for loss,acc
    fig1, axs = plt.subplots(4,sharex=True,constrained_layout=True)
    axs[0].plot(train_loss, color = "red") 
    axs[0].set_title("Train loss")
    axs[1].plot(train_acc, color = "blue") 
    axs[1].set_title("Train Accuracy")
    axs[2].plot(test_loss, color = "green")
    axs[2].set_title("Test Loss")
    axs[3].plot(test_acc) 
    axs[3].set_title("Test Accuracy")
    #fig1.tight_layout()
    #plt.show()
    fig1.savefig(f"loss_acc_curve/epoch{epoch}_accuracy{round(acc.item(),3)}_mcc_{round(mcc.item(),3)}.png")
    plt.close(fig1)
  except:
    print("MCC is 0")

In [None]:
#for cross entropy loss
def validation(epoch,train_loss, train_acc, test_loss, test_acc):
  correct = 0
  total = 0
  t_loss = 0
  y_true = []
  y_pred = []
  # Iterate through test dataset
  model.eval()
  for images, labels in test_loader:
      #print(f"labels size are {labels.shape}")
      y_true = np.append(y_true, labels.numpy())
      images = images.view(-1,3,depth,height,width)
      test = Variable(images.to(device), requires_grad=False)
      labels = Variable(labels.to(device), requires_grad=False)
      with torch.no_grad():
        # Forward propagation
        outputs = model(test)
        t_loss += error(outputs, labels)
        # Get predictions from the maximum value
        predicted = torch.max(outputs.data, 1)[1]
        #print(f"prediction size are {predicted.shape}")
        y_pred = np.append(y_pred, predicted.cpu().numpy())
        # Total number of labels
        total += len(labels)
        correct += (predicted == labels).sum()
  model.train()
  loss = t_loss.cpu().numpy() / float(total)
  test_loss.append(loss)
  accuracy = 100 * correct / float(total)
  test_acc.append(accuracy)

  mcc_score = mcc(y_true, y_pred)
  print(f"MCC score is {round(mcc_score,4)}")


  global MCC_SCORE
  if MCC_SCORE < mcc_score:
      MCC_SCORE = mcc_score
      os.makedirs("model_checkpoints", exist_ok=True)
      try:
        torch.save(model.state_dict(), f"model_checkpoints/3D_pointcloud_SimpleCNN_{epoch}for_cloud_size_32_64_64_acc_{round(accuracy.item(),3)}_mcc_{round(mcc_score.item(),3)}.pth")
      except:
        pass
  cf =confusion_matrix(y_true, y_pred)
  #print(cf)
  conf_mat(cf, epoch, accuracy, mcc_score, y_true, train_loss, train_acc, test_loss, test_acc)

  print('TESTING ---> Epoch: {} Loss: {} Accuracy: {} %'.format(epoch, round(loss,3), round(accuracy.item(),3)))


  return test_loss, test_acc


# Training

In [None]:
### TRAINING code
import gc

train_loss = []   #to keep track of loss with respect to number of epoch 
test_loss = []
iteration_list = []
train_acc = []
test_acc = []

for epoch in tqdm(range(num_epochs)):
    #count = 1
    accuracy_list = []
    loss_list = []
    for i, (images, labels) in enumerate(train_loader):
        correct = 0
        #print(images.shape)
        images = images.view(-1,3,depth,height,width)
        #train = Variable(images)    #to use on cpu
        #labels = Variable(labels)    #to use on cpu

        train_img = Variable(images.to(device), requires_grad=True)
        labels = Variable(labels.to(device), requires_grad=False)

        # Clear gradients
        optimizer.zero_grad()
        # Forward propagation
        outputs = model(train_img)
        # Calculate softmax and ross entropy loss
        loss = error(outputs, labels)
        
        #labels = labels.view(-1, 1)  #only for BCELogitsloss
        #loss = error(outputs.float(), labels.float())  #if BCELoss is measured
        
        # Calculating gradients
        loss.backward()
        # Update parameters
        optimizer.step()
        
        predicted = torch.max(outputs.data, 1)[1]   
        correct = (predicted == labels).sum()
        total = len(labels)        
        accuracy = 100 * correct / float(total)

        '''
        ###For BCELoss
        predicted = torch.tensor([0 if i<=0.5 else 1 for i in outputs]).to(device)
        accuracy = 100 * (predicted.detach() == labels).cpu().numpy().mean()
        '''
        #accuracy_list = np.append(accuracy_list, accuracy)

        accuracy_list = np.append(accuracy_list, accuracy.cpu().numpy())
        loss_list = np.append(loss_list, loss.detach().cpu().numpy())
        #print(f"Training Mini Batch--> Epoch:{epoch} Iteration:{count} Loss :{loss} Accuracy: {accuracy}")
        #count += 1
    final_loss = np.mean(loss_list)
    final_acc = np.mean(accuracy_list)
    print(f"TRAINING ---> Epoch: {epoch} Loss: {round(final_loss,4)} Accuracy: {round(final_acc,4)}")
    train_loss.append(final_loss)
    train_acc.append(final_acc)
    del loss_list, accuracy_list, labels
    del predicted
    del loss , outputs
    del images
    del train_img
    gc.collect() 
    test_loss , test_acc = validation(epoch, train_loss, train_acc, test_loss, test_acc)
    
    train = VoxelDataset(files_list=train_list, source_path=train_files_directory)
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=4)
    #train_loader = torch.utils.data.DataLoader(train,sampler=BalancedBatchSampler(train), batch_size = batch_size, num_workers=4)
  

In [None]:
#loss.data
#round(accuracy.data, 3)
#np.mean(accuracy_list)
mcc_score = mcc(y_true, y_pred)
#print(len(y_true))
#print(len(y_pred))
len(accuracy_list)
round(final_acc,4)
print(mcc_score)

In [None]:
import matplotlib.pyplot as plt
# visualization loss 
plt.plot(train_loss)
plt.xlabel("Number of iteration")
plt.ylabel("Loss")
plt.title("CNN: Loss vs Number of iteration")
plt.show()

# visualization accuracy 
plt.plot(train_acc,color = "red")
plt.xlabel("Number of iteration")
plt.ylabel("Accuracy")
plt.title("CNN: Accuracy vs Number of iteration")
plt.show()