In [1]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import sys
sys.path.append("/content/drive/My Drive/GoogleColab/pytorch3d_packages")

In [6]:
# Imports
import os, glob, numpy as np, zipfile
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split

# Paths
zip_path = "/content/drive/MyDrive/GoogleColab/ShapeNetCore.zip"
extract_path = "/content/ShapeNetCore/ShapeNetCore"

# Extracts ShapeNetCore.zip if not already extracted
if not os.path.exists(extract_path):
    print(f"Extracting {zip_path} to {extract_path} ...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall("/content/ShapeNetCore")
    print("Extraction completed.")

# ClassNames
SYNSET_TO_NAME = {
    "02808440": "bathtub",
    "03642806": "laptop",
    "02992529": "cellphone",
    "03211117": "display",
    "03046257": "clock"
}

# Verify extraction
top_level_ids = os.listdir(extract_path)
top_level_names = [SYNSET_TO_NAME.get(syn, syn) for syn in top_level_ids]
print("Top-level classes:", top_level_names)

# Binvox Reader
def read_binvox(filepath):
    """Read .binvox file and return voxel grid as numpy array."""
    with open(filepath, "rb") as f:
        line = f.readline().decode("ascii").strip()
        if not line.startswith("#binvox"):
            raise IOError("Not a binvox file")
        dims, translate, scale = None, None, None
        while True:
            line = f.readline().decode("ascii").strip()
            if line.startswith("dim"):
                dims = list(map(int, line.split()[1:]))
            elif line.startswith("translate"):
                translate = list(map(float, line.split()[1:]))
            elif line.startswith("scale"):
                scale = float(line.split()[1])
            elif line.startswith("data"):
                break
        if dims is None:
            raise IOError("Missing dimensions in binvox")

        # Convert compressed .binvox data into a 3D voxel grid
        raw_data = np.frombuffer(f.read(), dtype=np.uint8)
        values, counts = raw_data[0::2], raw_data[1::2]
        voxels = np.repeat(values, counts).astype(np.bool_)
        return voxels.reshape(dims)

# Dataset
class ShapeNetVoxDataset(Dataset):
    def __init__(self, root_dir, variant="surface"):
        """
        variant: "surface" or "solid"
        """
        self.root_dir = root_dir
        self.items, self.classes = [], []
        self.variant = variant

        # Identifies all class folders, sort them alphabetically, and assign a numeric label to each for training
        class_dirs = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
        class_dirs.sort()
        self.classes = class_dirs
        self.class_to_idx = {c: i for i, c in enumerate(self.classes)}

        # Collects all .binvox files for each class and prints out how many are found
        for c in self.classes:
            class_dir = os.path.join(root_dir, c)
            binvox_files = glob.glob(os.path.join(class_dir, "**", "*.binvox"), recursive=True)
            for f in binvox_files:
                if variant in f:
                    self.items.append((f, self.class_to_idx[c]))
            print(f"[DEBUG] Class {SYNSET_TO_NAME.get(c, c)}: found {len(self.items)} items so far")

    # Return the number of items in the dataset
    def __len__(self):
        return len(self.items)

    # Loads a voxel file, converts it into a tensor, resizes it into 32³, and returns its label
    def __getitem__(self, idx):
        filepath, label = self.items[idx]
        vox = read_binvox(filepath)
        vox = torch.from_numpy(vox).float().unsqueeze(0)
        vox = F.interpolate(vox.unsqueeze(0), size=(32,32,32), mode="nearest").squeeze(0)
        return vox, label

# 3DCNN
class Small3DCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv3d(1, 32, kernel_size=3, padding=1) # First 3D conv layer: 1 input channel → 32 output channels
        self.conv2 = nn.Conv3d(32, 64, kernel_size=3, padding=1) # Second 3D conv layer: 32 → 64 channels
        self.conv3 = nn.Conv3d(64, 128, kernel_size=3, padding=1) # Third 3D conv layer: 64 → 128 channels
        self.pool = nn.MaxPool3d(2) # 3D max pooling, splits the dimension in half
        self.fc1 = nn.Linear(128 * 4 * 4 * 4, 512) # Fully connected layer: flatten 3D features to 512
        self.fc2 = nn.Linear(512, num_classes) # Output layer: 512 → number of classes

    # Process the 3D data, make it flat, and get the final class prediction
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

# Training Loop
def run_training(dataset_root=extract_path, epochs=5, batch_size=8, lr=1e-3, variant="surface"):

    dataset = ShapeNetVoxDataset(dataset_root, variant=variant) # Applies custom loader
    if len(dataset) == 0:
        raise RuntimeError("Dataset is empty — check if .binvox files exist!")

    # Split the data into training and validation (80/20)
    n_classes = len(dataset.classes)
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Use GPU if possible, otherwise use CPU
    model = Small3DCNN(num_classes=n_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        for vox, labels in train_loader:
            vox, labels = vox.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(vox)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss, correct, total = 0, 0, 0
        with torch.no_grad():
            for vox, labels in val_loader:
                vox, labels = vox.to(device), labels.to(device)
                outputs = model(vox)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * vox.size(0)
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()
        val_acc = 100. * correct / total

        # Print only validation loss and accuracy
        print(f"Epoch {epoch+1}/{epochs} - ValLoss: {val_loss/len(val_loader.dataset):.4f}, ValAcc: {val_acc:.2f}%")

    return model

# Run training classifying 3D figures
model = run_training(epochs=5, batch_size=8, lr=1e-3, variant="surface")


Top-level classes: ['bathtub', 'laptop', 'cellphone', 'display', 'clock']
[DEBUG] Class bathtub: found 856 items so far
[DEBUG] Class cellphone: found 1686 items so far
[DEBUG] Class clock: found 2337 items so far
[DEBUG] Class display: found 3427 items so far
[DEBUG] Class laptop: found 3887 items so far
Epoch 1/5 - ValLoss: 0.4270, ValAcc: 88.30%
Epoch 2/5 - ValLoss: 0.2864, ValAcc: 90.49%
Epoch 3/5 - ValLoss: 0.3141, ValAcc: 91.26%
Epoch 4/5 - ValLoss: 0.3625, ValAcc: 91.13%
Epoch 5/5 - ValLoss: 0.4320, ValAcc: 91.90%
