In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
 import os
 path="/content/drive/MyDrive/Voxel_Grid_classifier/Data/Transformed_Data/Transform_2/Train_Set/"
 len(os.listdir(path))

3132

In [None]:
import glob
# Get a list of all files in the folder (excluding subdirectories)
files = glob.glob(os.path.join(path, '*'))

# Iterate over the files and delete the empty ones
for file in files:
    if os.path.isfile(file) and os.path.getsize(file) == 0:
        os.remove(file)
        print(f"Deleted empty file: {file}")


Deleted empty file: /content/drive/MyDrive/Voxel_Grid_classifier/Data/Transformed_Data/Transform_2/Train_Set/bed_181.pt.gz
Deleted empty file: /content/drive/MyDrive/Voxel_Grid_classifier/Data/Transformed_Data/Transform_2/Train_Set/monitor_1999.pt.gz
Deleted empty file: /content/drive/MyDrive/Voxel_Grid_classifier/Data/Transformed_Data/Transform_2/Train_Set/bed_123.pt.gz
Deleted empty file: /content/drive/MyDrive/Voxel_Grid_classifier/Data/Transformed_Data/Transform_2/Train_Set/bathtub_19.pt.gz
Deleted empty file: /content/drive/MyDrive/Voxel_Grid_classifier/Data/Transformed_Data/Transform_2/Train_Set/sofa_2935.pt.gz
Deleted empty file: /content/drive/MyDrive/Voxel_Grid_classifier/Data/Transformed_Data/Transform_2/Train_Set/bathtub_8.pt.gz
Deleted empty file: /content/drive/MyDrive/Voxel_Grid_classifier/Data/Transformed_Data/Transform_2/Train_Set/bed_112.pt.gz
Deleted empty file: /content/drive/MyDrive/Voxel_Grid_classifier/Data/Transformed_Data/Transform_2/Train_Set/bathtub_10.pt.gz
D

In [None]:
len(os.listdir(path))

3100

In [None]:
!pip install dask==2022.7.0
!pip install torch
!pip install torchvision
!pip install trimesh
!pip install matplotlib


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install open3d

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
%cd /content/drive/MyDrive/Voxel_Grid_classifier/Data/ModelNet10

/content/drive/MyDrive/Voxel_Grid_classifier/Data/ModelNet10


In [None]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import trimesh
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from numpy import random
import torch.nn as nn
import torch.optim as optim
import gzip
import open3d as o3d

In [None]:

class Mesh:
    def __init__(self, vertices, faces):
        self.vertices = vertices
        self.faces = faces

class ToMesh:
    def __call__(self, mesh):
        vertices = np.asarray(mesh.vertices, dtype=np.float32)
        faces = np.asarray(mesh.faces, dtype=np.float32)
        return Mesh(vertices, faces)

def triangle_area(point1, point2, point3):
    return 0.5 * np.linalg.norm(np.cross(point2 - point1, point3 - point1)) #calculate the area of a triangle given its vertices

def points_in_triangle(point1, point2, point3, num_points): #generate random points within a triangle
    u = np.random.uniform(0, 1, num_points)
    v = np.random.uniform(0, 1, num_points)
    is_inside = (u + v <= 1)
    u_inside = u[is_inside]
    v_inside = v[is_inside]
    w_inside = 1 - u_inside - v_inside
    points = (u_inside[:, None] * point1) + (v_inside[:, None] * point2) + (w_inside[:, None] * point3)
    return points

class PointCloud:
    def __init__(self, faces=None, vertices=None, density=10):
        self.faces = faces
        self.vertices = vertices
        self.density=density

    def generate_new_points(self, density, smallest=0.02):
        new = np.empty((0, 3))
        for i in range(len(self.faces)):
            point1 = self.vertices[int(self.faces[i][0])]
            point2 = self.vertices[int(self.faces[i][1])]
            point3 = self.vertices[int(self.faces[i][2])]

            area = triangle_area(point1, point2, point3)
            if area > smallest:
                number_of_points = int(density * area)
                new_points = points_in_triangle(point1, point2, point3, number_of_points)
                new = np.concatenate((new, new_points))

        return Mesh(np.concatenate((self.vertices, new)), self.vertices)

    def __call__(self, mesh):
        self.faces = mesh.faces
        self.vertices = mesh.vertices
        return self.generate_new_points(self.density)

class Rescale:
    def __init__(self, size, scaling_param=1.1):
        self.size = size
        self.scaling_param=scaling_param

    def __call__(self, mesh):
        rescaled = mesh.vertices * (int(self.size) / 2) / (self.scaling_param * np.max(mesh.vertices))
        return Mesh(rescaled, None)

class Broadcast:
    def __init__ (self):
      pass
    def __call__ (self,mesh):
        voxel_points = mesh.vertices.astype(int) #broadcast the elements simpling truncating to the closest integer
        return Mesh(voxel_points, None)

class Unique:
    def __init__(self, minimum_count=2):
      self.minimum_count = minimum_count
    def __call__(self, mesh):
        flattened_array = mesh.vertices.view(np.dtype((np.void, mesh.vertices.dtype.itemsize * mesh.vertices.shape[1])))  #flatten the original array to 1D
        unique_elements, counts = np.unique(flattened_array, return_counts=True) # get unique elements and their counts
        selected_elements = unique_elements[counts >= self.minimum_count ] # select the elements with a frequency greater than or equal to min_frequency
        selected_arrays = np.array([np.frombuffer(element, dtype=mesh.vertices.dtype) for element in selected_elements]) # Convert the selected elements back to arrays

        return Mesh(selected_arrays, None)

class GenerateVoxelGrid:
    def __init__(self, size):
        self.size = size

    def __call__(self, mesh):
        offset_data=mesh.vertices+self.size/2 #offset to have everything positive
        final_grid = np.zeros((self.size, self.size, self.size))
        for i in range(len(offset_data)):
          final_grid[int(offset_data[i][0]),int(offset_data[i][1]), int(offset_data[i][2])]=1  #fulfill final grid with the data obteined before
        return Mesh(final_grid, None)

class ToPythorch:
    def __init__(self):
        pass
    def __call__ (self,mesh):
        voxel_points = torch.from_numpy(mesh.vertices)
        return voxel_points

In [None]:
class ModelNet10Dataset(Dataset):
    def __init__(self, root_dir, kind,transform=None): #root_dir is the root directory where the files are located
        self.root_dir = root_dir
        self.transform = transform #optional parameters that specifies a transformation to be applied in the dataset samples
        self.mesh_files = [] #empty list to store the content of each file
        self.labels = [] #empty list to store the labels of each file
        self.class_names = sorted(os.listdir(self.root_dir)) #tuple of names of the possible classes
        for i, class_name in enumerate(self.class_names): #iterate through the possible classes
            class_dir = os.path.join(self.root_dir, class_name) #get the directory of the folder by merging
            class_dir_train = os.path.join(class_dir, kind)
            for mesh_file in os.listdir(class_dir_train): #iterating through each file
                self.mesh_files.append(os.path.join(class_dir_train, mesh_file)) #append the class directory and the file
                self.labels.append(i)

    def __len__(self):
        return len(self.mesh_files)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        mesh_file = self.mesh_files[idx]
        label = self.labels[idx]                         #if we pass 1 as method, we get the first method
        mesh = trimesh.load(mesh_file)
        if self.transform:
            mesh = self.transform(mesh)
        return mesh, label, self.class_names[label]


#this is the way of deploying the dataset thaught by te professor in lab 02.
#we define an object that inherits from Dataset class, so it already starts with its funtionalities
#first we initialized the class
#then we define a method that returns the dataset legth
#then we define a method that allow us to pick up one element by index, and we add a transformation as a conditional
#We have written 2 different methods to make the transformartion to Voxel Grid format. To choose between the 2, pass method = 1 or method = 2 to the dataset class


In [None]:
Transformation1 = transforms.Compose([
      ToMesh(),
      PointCloud(density=20),
      Rescale(size=60),
      Broadcast(),
      Unique(),
      GenerateVoxelGrid(size=60),
      ToPythorch(),
  ])

training_set_1 = ModelNet10Dataset('/content/drive/MyDrive/Voxel_Grid_classifier/Data/ModelNet10', transform=Transformation1, kind="train")

In [None]:
import multiprocessing

num_cores = multiprocessing.cpu_count()
print("Number of CPU cores:", num_cores)

Number of CPU cores: 40


In [None]:
!pip install dask dask[distributed]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import dask
import dask.distributed
import gzip
import os

def process_iteration(i):
    name = f"{training_set_1[i][2]}_{i}"
    file_path = "/content/drive/MyDrive/Voxel_Grid_classifier/Data/Transformed_Data/Transform_2/Train_Set/" + name + ".pt.gz"

    if not os.path.exists(file_path):
        try:
            # Compress and save the tensor to a .pt.gz file
            with gzip.open(file_path, 'wb') as f:
                torch.save(training_set_1[i], f, pickle_module=torch.serialization.pickle)
            return i  # Return the index if the iteration completes successfully
        except:
            return None  # Return None if an exception occurs during the iteration
    else:
        return None  # Skip the iteration if the file already exists

# Set up Dask client
client = dask.distributed.Client()

# Create Dask futures for each iteration
futures = client.map(process_iteration, range(1, len(training_set_1)))

# Compute the results
results = client.gather(futures)

# Shutdown the Dask client
client.shutdown()

# Process the results
for result in results:
    if result is not None:
        print(f"Tensor {training_set_1[result][2]}_{result} saved successfully.")

Perhaps you already have a cluster running?
Hosting the HTTP server on port 38463 instead
