# Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Copy Files

In [None]:
import shutil
import os

os.makedirs('traintestlist', exist_ok=True)

## For pt_cloud files
shutil.copyfile("/content/drive/My Drive/AlzheimerStallCatcher3DConvPointCloud/test_cloud.zip" , "test_cloud.zip")
shutil.copyfile("/content/drive/My Drive/AlzheimerStallCatcher3DConvPointCloud/submission_format.csv", "./traintestlist/submission_format.csv")

!unzip test_cloud.zip;

os.remove("test_cloud.zip")

path = "./test_cloud/"


submission_format_csv = "submission_format.csv"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: test_cloud/260768.h5    
  inflating: test_cloud/260770.h5    
  inflating: test_cloud/260771.h5    
  inflating: test_cloud/260810.h5    
  inflating: test_cloud/260945.h5    
  inflating: test_cloud/260947.h5    
  inflating: test_cloud/260963.h5    
  inflating: test_cloud/261040.h5    
  inflating: test_cloud/261230.h5    
  inflating: test_cloud/261259.h5    
  inflating: test_cloud/261278.h5    
  inflating: test_cloud/261331.h5    
  inflating: test_cloud/261366.h5    
  inflating: test_cloud/261383.h5    
  inflating: test_cloud/177126.h5    
  inflating: test_cloud/177151.h5    
  inflating: test_cloud/177188.h5    
  inflating: test_cloud/177260.h5    
  inflating: test_cloud/177274.h5    
  inflating: test_cloud/177344.h5    
  inflating: test_cloud/177356.h5    
  inflating: test_cloud/177439.h5    
  inflating: test_cloud/177500.h5    
  inflating: test_cloud/177532.h5    
  inflating: test_cloud

In [None]:
## For image files
shutil.copyfile("/content/drive/My Drive/AlzheimerStallCatcher3DConvPointCloud/test_frames_gray.zip" , "test_gray.zip")
!unzip test_gray.zip;
os.remove("test_gray.zip")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: test_frames_gray/683999/48.jpg  
  inflating: test_frames_gray/683999/49.jpg  
  inflating: test_frames_gray/683999/5.jpg  
  inflating: test_frames_gray/683999/50.jpg  
  inflating: test_frames_gray/683999/51.jpg  
  inflating: test_frames_gray/683999/52.jpg  
  inflating: test_frames_gray/683999/53.jpg  
  inflating: test_frames_gray/683999/54.jpg  
  inflating: test_frames_gray/683999/6.jpg  
  inflating: test_frames_gray/683999/7.jpg  
  inflating: test_frames_gray/683999/8.jpg  
  inflating: test_frames_gray/683999/9.jpg  
   creating: test_frames_gray/684029/
  inflating: test_frames_gray/684029/0.jpg  
  inflating: test_frames_gray/684029/1.jpg  
  inflating: test_frames_gray/684029/10.jpg  
  inflating: test_frames_gray/684029/11.jpg  
  inflating: test_frames_gray/684029/12.jpg  
  inflating: test_frames_gray/684029/13.jpg  
  inflating: test_frames_gray/684029/14.jpg  
  inflating: test_frames_gray/

In [None]:
##loading pretrained weight
import shutil
shutil.copyfile("/content/drive/My Drive/AlzheimerStallCatcher3DConvPointCloud/Multimodal_catcrosent_epoch_34_acc_84.068_mcc_0.60754.pth", "weight_3D.pth")

'weight_3D.pth'

# Import Libraries

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import glob
import gc

# PyTorch libraries and modules
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.utils.data

torch.manual_seed(0)

import csv

import itertools
import datetime

In [None]:
# Some data
batch_size = 76

split_number = 0

# **Dataset for PointCloud-Voxel**

### Voxel Dataset Class

In [None]:
import h5py
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

class VoxelDataset(Dataset):
    def __init__(self, dataset_path, split_path, split_number, input_shape, training):
        self.training = training
        self.sequences, self.labels = self._extract_sequence_paths_and_filename(dataset_path, split_path, split_number,
                                                                              training)  # creating a list of directories where the extracted frames are saved
        self.label_names = ["Non-stalled", "Stalled"]  # Getting the label names or name of the class
        self.num_classes = len(self.label_names)  # Getting the number of class
        self.input_shape = input_shape

    def _extract_sequence_paths_and_filename(
        self, dataset_path, split_path="traintestlist", split_number=0, training=True
    ):
        """ Extracts paths to sequences given the specified train / test split """
        fn = f"fold_{split_number}_train.csv" if training else f"submission_format.csv"
        split_path = os.path.join(split_path, fn)
        df = pd.read_csv(split_path)
        file_name = df['filename'].values
        sequence_paths = []
        for i , video_name in enumerate(file_name):
            seq_name = video_name.split(".mp4")[0]
            sequence_paths += [os.path.join(dataset_path, seq_name).replace('\\','/')]
        return sequence_paths, file_name

    def pc2voxel(self, cloud0, cloud1, cloud2, depth=32, height=64, width=64):

        voxel_grid = np.zeros((3, depth, height, width), dtype=np.float16)

        in_depth = max(np.max(cloud0[:, 0]), np.max(cloud1[:, 0]), np.max(cloud2[:, 0]))
        in_height = max(np.max(cloud0[:, 1]), np.max(cloud1[:, 1]), np.max(cloud2[:, 1]))
        in_width = max(np.max(cloud0[:, 2]), np.max(cloud1[:, 2]), np.max(cloud2[:, 2]))

        if in_depth >= depth:
            depth_ratio = depth / (in_depth + 1)
            cloud0[:, 0] = np.uint32(cloud0[:, 0].astype(float) * depth_ratio)
            cloud1[:, 0] = np.uint32(cloud1[:, 0].astype(float) * depth_ratio)
            cloud2[:, 0] = np.uint32(cloud2[:, 0].astype(float) * depth_ratio)
        if in_height >= height:
            height_ratio = height / (in_height + 1)
            cloud0[:, 1] = np.uint32(cloud0[:, 1].astype(float) * height_ratio)
            cloud1[:, 1] = np.uint32(cloud1[:, 1].astype(float) * height_ratio)
            cloud2[:, 1] = np.uint32(cloud2[:, 1].astype(float) * height_ratio)
        if in_width >= width:
            width_ratio = width / (in_width + 1)
            cloud0[:, 2] = np.uint32(cloud0[:, 2].astype(float) * width_ratio)
            cloud1[:, 2] = np.uint32(cloud1[:, 2].astype(float) * width_ratio)
            cloud2[:, 2] = np.uint32(cloud2[:, 2].astype(float) * width_ratio)

        voxel_grid[0, cloud0[:, 0], cloud0[:, 1], cloud0[:, 2]] = 1.0
        voxel_grid[1, cloud1[:, 0], cloud1[:, 1], cloud1[:, 2]] = 1.0
        voxel_grid[2, cloud2[:, 0], cloud2[:, 1], cloud2[:, 2]] = 1.0

        return voxel_grid

    def get_cloud(self, filename):
        depth = self.input_shape[0]
        height = self.input_shape[1]
        width = self.input_shape[2]

        hf = h5py.File(filename, 'r')
        c1 = hf['cloud1'][:]
        c2 = hf['cloud2'][:]
        c3 = hf['cloud3'][:]
        hf.close()

        X = self.pc2voxel(c1, c2, c3, depth=depth, height=height, width=width)
        X = torch.from_numpy(X).float()
        return X

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        sequence_path = self.sequences[index % len(self)]
        target = self.labels[index % len(self)]

        voxels = self.get_cloud(sequence_path + ".h5")

        return voxels, target

### Create Train and Test Datasets of Pt Cloud

In [None]:
import gc
import time

start = time.time()

##change this
dataset_path = 'test_cloud'
split_path = 'traintestlist'

checkpoint_model = ''

voxel_shape = [32, 64, 64]

# Define test set
test_dataset_vox = VoxelDataset(
    dataset_path=dataset_path,
    split_path=split_path,
    split_number=split_number,
    input_shape=voxel_shape,
    training=False,
)
test_dataloader_vox = DataLoader(test_dataset_vox, batch_size=batch_size, shuffle=False, num_workers=4)


endtime = time.time()

print("Elapsed time : " + str(endtime-start))

gc.collect() 

Elapsed time : 0.03773069381713867


11

# **Dataset for Images**

### Image Dataset Class

In [None]:
import glob
import random
import os
import numpy as np
import torch
import pandas as pd

from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms

# Normalization parameters for pre-trained PyTorch models
mean = np.array([0])
std = np.array([1])


class ImageDataset(Dataset):
    def __init__(self, dataset_path, split_path, split_number, input_shape, sequence_length, training):
        self.training = training
        self.sequences, self.labels = self._extract_sequence_paths_and_filename(dataset_path, split_path, split_number,
                                                                              training)  # creating a list of directories where the extracted frames are saved
        self.sequence_length = int(
            sequence_length)  # Defining how many frames should be taken per video for training and testing
        self.label_names = ["Non-stalled", "Stalled"]  # Getting the label names or name of the class
        self.num_classes = len(self.label_names)  # Getting the number of class
        self.input_shape = input_shape
        self.transform = transforms.Compose(
            [
                transforms.Grayscale(num_output_channels=3),
                transforms.Resize(input_shape[-2:], Image.BICUBIC),
                transforms.ToTensor(),
                # transforms.Normalize(mean, std)
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ]
        )  # This is to transform the datasets to same sizes, it's basically resizing -> converting the image to Tensor image -> then normalizing the image -> composing all the transformation in a single image

    def _extract_sequence_paths_and_filename(
        self, dataset_path, split_path="traintestlist", split_number=0, training=True
    ):
        """ Extracts paths to sequences given the specified train / test split """
        fn = f"fold_{split_number}_train.csv" if training else f"submission_format.csv"
        split_path = os.path.join(split_path, fn)
        df = pd.read_csv(split_path)
        file_name = df['filename'].values
        sequence_paths = []
        for i , video_name in enumerate(file_name):
            seq_name = video_name.split(".mp4")[0]
            sequence_paths += [os.path.join(dataset_path, seq_name).replace('\\','/')]
        return sequence_paths, file_name

    def _frame_number(self, image_path):
        """ Extracts frame number from filepath """
        image_path = image_path.replace('\\', '/')
        try:
            return int(image_path.split('/')[-1].split('.jpg')[0])
        except:
            print("Got error while getting image number ....")
            exit()

    def _pad_to_length(self, sequence, path):
        """ Pads the video frames to the required sequence length for small videos"""
        try:
            left_pad = sequence[0]
        except:
            print("Got error while padding ....")
            exit()
        if self.sequence_length is not None:
            while len(sequence) < self.sequence_length:
                sequence.insert(0, left_pad)
        return sequence

    def __getitem__(self, index):
        sequence_path = self.sequences[index % len(self)]
        target = self.labels[index % len(self)]
        # Sort frame sequence based on frame number 
        image_paths = sorted(glob.glob(sequence_path + '/*.jpg'), key=lambda path: self._frame_number(path))

        # Pad frames of videos shorter than `self.sequence_length` to length

        image_paths = self._pad_to_length(image_paths, sequence_path)
        total_image = len(image_paths)
        if total_image >= self.sequence_length and total_image < (
                self.sequence_length + int(self.sequence_length // 2)):
            midpoint = (total_image // 2)
            sample_interval = 1
            start_i = (midpoint - (self.sequence_length // 2))
            end_i = start_i + self.sequence_length
        elif total_image >= (self.sequence_length + int(self.sequence_length // 2)):
            midpoint = (total_image // 2)
            sample_interval = 1
            start_i = (midpoint - (self.sequence_length // 2) + int((self.sequence_length // 2) // 2)) - 1
            end_i = start_i + self.sequence_length
        else:
            start_i = 0
            end_i = total_image
            sample_interval = 1
        # flip = np.random.random() < 0.5
        # Extract frames as tensors
        image_sequence = []
        for i in range(start_i, end_i, sample_interval):
            if self.sequence_length is None or len(image_sequence) < self.sequence_length:
                img = Image.open(image_paths[i])
                image_tensor = self.transform(img)
                # if flip:
                #     image_tensor = torch.flip(image_tensor, (-1,))
                image_sequence.append(image_tensor)
        image_sequence = torch.stack(image_sequence)
        image_sequence = image_sequence.view(3, self.sequence_length, self.input_shape[-2], self.input_shape[-2])
        return image_sequence, target

    def __len__(self):
        return len(self.sequences)
        

# **Create Train and Test Datasets of Images**

In [None]:
import gc
import time


start = time.time()

dataset_path = 'test_frames_gray'
split_path = 'traintestlist'
sequence_length=40

img_dim = 112

channels = 3
latent_dim = 512

image_shape = (channels, img_dim, img_dim)


# Define test set
test_dataset_img = ImageDataset(
    dataset_path=dataset_path,
    split_path=split_path,
    split_number=split_number,
    sequence_length=sequence_length,
    input_shape=image_shape,
    training=False,
)
test_dataloader_img = DataLoader(test_dataset_img, batch_size=batch_size, shuffle=False, num_workers=4)


endtime = time.time()

print("Elapsed time : " + str(endtime-start))

gc.collect() 

Elapsed time : 0.031213998794555664


74

In [None]:
print(f"Length of test image loader {len(test_dataloader_img)}, Length of test pt cld loader {len(test_dataloader_vox)}")


Length of test image loader 187, Length of test pt cld loader 187


# **Multimodal Model**

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models.video import r2plus1d_18
from torchvision.models.video import r3d_18

import os
import sys



##############################
#     Encoder for Image
##############################

class Encoder(nn.Module):
    def __init__(self, latent_dim):
        super(Encoder, self).__init__()
        resnet = r2plus1d_18(pretrained=True)
        self.dropout1 = nn.Dropout(0.2)
        self.feature_extractor = nn.Sequential(*list(resnet.children())[0:3])
        self.feature_extractor_new1 = nn.Sequential(*list(resnet.children())[3:4])
        self.feature_extractor_new2 = nn.Sequential(*list(resnet.children())[4:5])
        self.feature_extractor_new3 = nn.Sequential(*list(resnet.children())[5:6])
        self.final = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(resnet.fc.in_features, latent_dim),
            nn.BatchNorm1d(latent_dim, momentum=0.01)
        )
    

    def forward(self, x):
        with torch.no_grad():
           x = self.feature_extractor(x)
        x = self.dropout1(x)  
        x = self.feature_extractor_new1(x)
        x = self.dropout1(x)
        x = self.feature_extractor_new2(x)
        x = self.dropout1(x)
        x = self.feature_extractor_new3(x)
        x = x.view(x.size(0), -1)
        #print(self.final(x))
        
        return self.final(x)


##############################
#   Encoder For Point Cloud
##############################

class Encoder_pt(nn.Module):
    def __init__(self, latent_dim):
        super(Encoder_pt, self).__init__()
        self.feature_extractor_pt = r3d_18(pretrained = True)
        self.final_pt = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(self.feature_extractor_pt.fc.out_features, latent_dim),
            nn.BatchNorm1d(latent_dim, momentum=0.01)
        )
    

    def forward(self, x):
        #with torch.no_grad():
        x = self.feature_extractor_pt(x)
        x = x.view(x.size(0), -1)

        return self.final_pt(x)


##############################
#      MultiModal Model
##############################

#dim=-1 is the right most dimension

class Multimodal(nn.Module):
    def __init__(
        self, num_classes, latent_dim=512, hidden_dim=1024
    ):
        super(Multimodal, self).__init__()
        self.encoder = Encoder(latent_dim)
        self.encoder_pt = Encoder_pt(latent_dim)
        hidden_dim2 = int(hidden_dim/2)      #if hidden_dim = 512 then dense layers : 1024->512->256->1
        hidden_dim3 = int(hidden_dim2/4)
        hidden_dim4 = int(hidden_dim3/4)
        hidden_dim5 = int(hidden_dim4/4)
        self.output_layers_final = nn.Sequential(
            nn.Linear(1024, hidden_dim),
            nn.BatchNorm1d(hidden_dim, momentum=0.01),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(hidden_dim, hidden_dim2),
            nn.BatchNorm1d(hidden_dim2, momentum=0.01),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(hidden_dim2, 2),
            #nn.Sigmoid()
        )


    ##Concatenating two stream
    
    def forward(self, x, y):
        
        x = self.encoder(x)
        y = self.encoder_pt(y)
        #print(f"img tensor size {x.shape} and pt tensor size {y.shape}")
        x = torch.cat((x, y), 1)
        #print(f"concatenated size is {x.shape}")
        return self.output_layers_final(x)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Define network
model = Multimodal(
    num_classes=2,
    latent_dim=latent_dim,
    hidden_dim=512
)

model = model.to(device)


model.half()  
for layer in model.modules():
  if isinstance(layer, nn.BatchNorm2d):
    layer.float()
    
checkpoint_model = 'weight_3D.pth'
# Add weights from checkpoint model if specified
if checkpoint_model:
    model.load_state_dict(torch.load(checkpoint_model))

# Testing

In [None]:
y_pred = np.array([]) #defining empty array to store prediction
filename_array = np.array([]) #defining empty array to store filename

model.eval()

total_video = len(test_dataloader_img)*batch_size

for batch_i, ((X1, y), (X2,y2)) in enumerate(zip(test_dataloader_img, test_dataloader_vox)):
    image_sequences = Variable(X1.to(device), requires_grad=False)
    filename_array = np.append(filename_array, y)
    
    #print(f'from image come {y} from pt comes{y2}')

    pt_cloud = Variable(X2.to(device), requires_grad=False)
    
    image_sequences = image_sequences.half()
    pt_cloud = pt_cloud.half()


    with torch.no_grad():
        # Get sequence predictions
        predictions = model(image_sequences, pt_cloud)

        
    predicted = torch.max(predictions.data, 1)[1]    
    #predictions = torch.tensor([0 if i<=0.5 else 1 for i in predictions]).to(device)
    
    y_pred = np.append(y_pred, predicted.cpu().numpy())
    
    sys.stdout.write(
            "Testing -- [Batch %d/%d]"
            % (
                batch_i,
                len(test_dataloader_img),
            )
        )
    
#Create the csv
submission_dict = {"filename": filename_array, "stalled": y_pred.astype(int)}

submission_csv = pd.DataFrame(submission_dict)

submission_csv.to_csv("submission_multimodal_ccentrp1.csv", index=False)

tensor([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
        0, 0, 0, 0], device='cuda:0')
Testing -- [Batch 0/187]tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
        1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
        0, 0, 1, 0], device='cuda:0')
Testing -- [Batch 1/187]tensor([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,
        1, 0, 0, 0], device='cuda:0')
Testing -- [Batch 2/187]tensor([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 

In [None]:
from google.colab import files
files.download('submission_multimodal_ccentrp1.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>