In [None]:
# PointNet++ Custom Dataset Training Notebook
# ===========================================

########################################
# 1. Setup and Imports
########################################
import os
import sys

# Add project root to sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root)

########################################
# Plant-Only to Plant-With-Noise Preprocessing Notebook
########################################

import os
import numpy as np
import open3d as o3d
import torch
import matplotlib.pyplot as plt
import shutil
from sklearn.model_selection import train_test_split

# Import custom modules
from src.data_processing.data_augmentation import add_random_noise_points
from src.data_processing.data_preprocessing import (
    voxel_down_sample_with_indices,
    adjust_point_count_with_indices,
    normalize_point_cloud,
)
from src.utils.data_utils import load_point_cloud  # Ensure this is implemented
from src.models.pointnetplusplus import PointNetPlusPlus
from src.models.pointnet2_utils import PointNetSetAbstraction, PointNetSetAbstractionMsg, PointNetFeaturePropagation


In [2]:

########################################
# 1. Setup
########################################

#project_root = '.'  # Adjust if needed
raw_root = os.path.join(project_root, 'data', 'raw')
processed_root = os.path.join(project_root, 'data', 'processed')

plant_only_dir = os.path.join(raw_root, 'plant_only')
output_dir = os.path.join(processed_root, 'plant_only_with_noise')

os.makedirs(output_dir, exist_ok=True)

splits_dir = os.path.join(processed_root, 'splits')
for d in ['train', 'val', 'test']:
    os.makedirs(os.path.join(splits_dir, d), exist_ok=True)

# Parameters
num_noise_points = 5000
voxel_size = 0.005  # Adjust as needed
num_points = 2048
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1  # train+val+test=1.0

# Noise parameters for add_random_noise_points
noise_params = {
    'num_noise_points': num_noise_points,
    'color_options': [
        (0.0, 0.0, 1.0),  # Blue
        (0.0, 0.0, 0.0),  # Black
        (1.0, 1.0, 1.0)   # White
    ],
    'extend_ratio': 0.1,
    'noise_below_ratio': 0.7
}


In [None]:
########################################
# 2. List all plant-only .ply files
########################################

def load_ply_files(directory):
    files = []
    for root, dirs, fs in os.walk(directory):
        for f in fs:
            if f.endswith('.ply'):
                files.append(os.path.join(root, f))
    return sorted(files)

plant_files = load_ply_files(plant_only_dir)
print(f"Found {len(plant_files)} plant-only .ply files")

In [None]:
########################################
# 3. Process Each File: Add noise, downsample, adjust count, normalize
########################################

# Function to process a single .ply file and save .npz
def process_file(ply_path, output_dir, num_points, voxel_size, noise_params):
    # Load original plant-only point cloud
    pcd = o3d.io.read_point_cloud(ply_path)
    points = np.asarray(pcd.points, dtype=np.float32)
    colors = np.asarray(pcd.colors, dtype=np.float32) if pcd.has_colors() else None

    # Add noise points: returns combined_points, combined_colors, labels (0 or 1)
    combined_points, combined_colors, labels = add_random_noise_points(points, colors, **noise_params)
    print("After noise addition:")
    print("Unique labels:", np.unique(labels))
    print("Plant points:", np.sum(labels==1))
    print("Noise points:", np.sum(labels==0))

    # Convert to Open3D pcd
    noisy_pcd = o3d.geometry.PointCloud()
    noisy_pcd.points = o3d.utility.Vector3dVector(combined_points)
    noisy_pcd.colors = o3d.utility.Vector3dVector(combined_colors)

    # Voxel downsample
    downsampled_pcd, downsampled_indices = voxel_down_sample_with_indices(noisy_pcd, voxel_size)
    downsampled_labels = labels[downsampled_indices]





    print("After voxel downsample:")
    print("Unique labels:", np.unique(downsampled_labels))
    print("Plant points:", np.sum(downsampled_labels==1))
    print("Noise points:", np.sum(downsampled_labels==0))

    # Adjust point count
    adjusted_pcd, adjusted_labels = adjust_point_count_with_indices(downsampled_pcd, downsampled_labels, num_points)
    adjusted_points = np.asarray(adjusted_pcd.points, dtype=np.float32)
    adjusted_colors = np.asarray(adjusted_pcd.colors, dtype=np.float32)
    #adjusted_labels = downsampled_labels[adjusted_indices]


    print("After adjusting point count:")
    print("Unique labels:", np.unique(adjusted_labels))
    print("Plant points:", np.sum(adjusted_labels==1))
    print("Noise points:", np.sum(adjusted_labels==0))

    # Normalize
    normalized_pcd = normalize_point_cloud(adjusted_pcd)
    final_points = np.asarray(normalized_pcd.points, dtype=np.float32)

    # Save to npz
    base_name = os.path.splitext(os.path.basename(ply_path))[0]
    out_path = os.path.join(output_dir, base_name + '.npz')
    np.savez(out_path, points=final_points, labels=adjusted_labels)
    return out_path

processed_files = []
for ply_file in plant_files:
    out_path = process_file(ply_file, output_dir, num_points, voxel_size, noise_params)
    processed_files.append(out_path)

print(f"Processed {len(processed_files)} files into {output_dir}")

In [None]:
########################################
# 4. Splitting into Train/Val/Test
########################################

# We have processed_files list of npz
# We want to split into train/val/test by given ratios

train_files, temp_files = train_test_split(processed_files, train_size=train_ratio, random_state=42)
val_size = val_ratio / (val_ratio + test_ratio)
val_files, test_files = train_test_split(temp_files, train_size=val_size, random_state=42)

print(f"Train: {len(train_files)}, Val: {len(val_files)}, Test: {len(test_files)}")

def copy_files(file_list, target_dir):
    for f in file_list:
        shutil.copy(f, target_dir)

copy_files(train_files, os.path.join(splits_dir, 'train'))
copy_files(val_files, os.path.join(splits_dir, 'val'))
copy_files(test_files, os.path.join(splits_dir, 'test'))

In [None]:
########################################
# 5. Verification
########################################

# Let's load one example from train and print stats
sample_file = train_files[0]
data = np.load(sample_file)
points = data['points']
labels = data['labels']
print("Verification of a sample:")
print("Points shape:", points.shape)
print("Labels shape:", labels.shape)
print("Unique labels:", np.unique(labels))
print("Number plant points:", np.sum(labels==1))
print("Number noise points:", np.sum(labels==0))


In [4]:
def visualize_npz_file(npz_path, title='Point Cloud'):
    """
    Visualizes a point cloud from an .npz file with points colored based on labels.

    Args:
        npz_path (str): Path to the .npz file.
        title (str): Title for the visualization window.
    """
    data = np.load(npz_path)
    points = data['points']  # (N, 3)
    labels = data['labels']  # (N,)

    # Assign colors based on labels
    colors = np.zeros((points.shape[0], 3))
    colors[labels == 1] = [0, 1, 0]  # Green for plant
    colors[labels == 0] = [1, 0, 0]  # Red for noise

    # Create Open3D point cloud
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    pcd.colors = o3d.utility.Vector3dVector(colors)

    # Visualize
    o3d.visualization.draw_geometries([pcd], window_name=title)

# Example usage:
sample_npz = os.path.join(output_dir, 'Wheat_Gladius_B6_2023-06-27-2029_fused_output.npz')  # Replace with your sample file
visualize_npz_file(sample_npz, title='Sample Plant-With-Noise Point Cloud')

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# Define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Initialize the model
num_classes = 2  # 0=noise, 1=plant
model = PointNetPlusPlus(num_classes=num_classes).to(device)
print("Model initialized.")


# Define loss function and optimizer
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# 6. Create Custom PyTorch Dataset

class ProcessedPointCloudDataset(Dataset):
    def __init__(self, directory):
        """
        Initializes the dataset by listing all .npz files in the directory.

        Args:
            directory (str): Directory containing .npz files.
        """
        self.files = []
        for root, dirs, fs in os.walk(directory):
            for fname in fs:
                if fname.endswith('.npz'):
                    self.files.append(os.path.join(root, fname))
        self.files = sorted(self.files)
        print(f"Dataset initialized with {len(self.files)} files.")

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        """
        Retrieves the points and labels from a single .npz file.

        Args:
            idx (int): Index of the file to retrieve.

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: Points tensor and labels tensor.
        """
        data = np.load(self.files[idx])
        points = data['points']  # (N, 3)
        labels = data['labels']  # (N,)
        return points, labels



# Initialize datasets
train_dataset = ProcessedPointCloudDataset(os.path.join(splits_dir, 'train'))
val_dataset = ProcessedPointCloudDataset(os.path.join(splits_dir, 'val'))
test_dataset = ProcessedPointCloudDataset(os.path.join(splits_dir, 'test'))

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=0)

print(f"\nTrain samples: {len(train_dataset)}, Val samples: {len(val_dataset)}, Test samples: {len(test_dataset)}")


In [6]:
from tqdm import tqdm

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for points, labels in tqdm(loader, desc="Training", leave=False):
        # Move data to device
        points = points.float().to(device)  # (B, N, 3)
        labels = labels.long().to(device)   # (B, N)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(points)              # (B, N, num_classes)
        outputs = F.log_softmax(outputs, dim=-1)  # Apply log_softmax
        
        # Reshape for loss computation
        outputs = outputs.view(-1, num_classes)  # (B*N, num_classes)
        labels = labels.view(-1)                 # (B*N,)
        
        # Compute loss
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    average_loss = running_loss / len(loader)
    return average_loss

def validate_one_epoch(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    total_correct = 0
    total_points = 0
    with torch.no_grad():
        for points, labels in tqdm(loader, desc="Validation", leave=False):
            # Move data to device
            points = points.float().to(device)  # (B, N, 3)
            labels = labels.long().to(device)   # (B, N)
            
            # Forward pass
            outputs = model(points)              # (B, N, num_classes)
            outputs = F.log_softmax(outputs, dim=-1)  # Apply log_softmax
            
            # Reshape for loss computation
            outputs = outputs.view(-1, num_classes)  # (B*N, num_classes)
            labels = labels.view(-1)                 # (B*N,)
            
            # Compute loss
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            # Compute accuracy
            preds = torch.argmax(outputs, dim=1)     # (B*N,)
            correct = (preds == labels).sum().item()
            total_correct += correct
            total_points += labels.numel()
            
    average_loss = running_loss / len(loader)
    accuracy = 100.0 * total_correct / total_points
    return average_loss, accuracy
