In [None]:
import sys
#!{sys.executable} -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
#!{sys.executable} -m pip install numpy scikit-learn tqdm open3d matplotlib

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torch.optim.lr_scheduler import StepLR
import numpy as np
import os
from tqdm import tqdm
from collections import defaultdict
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import open3d as o3d
from collections import defaultdict, Counter 

Looking in indexes: https://download.pytorch.org/whl/cu121



[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [28]:
import glob

class S3DISDataset(Dataset):
    def __init__(self, root_dir="C:/Users/anast/ml/CVin3D/lab4/task2/Stanford3dDataset", area="Area_1", num_points=4096, split="train", seed=42):
        self.root_dir = root_dir
        self.area = area
        self.num_points = num_points
        self.split = split
        
        self.files = sorted(glob.glob(os.path.join(root_dir, area, "**/*.txt"), recursive=True))
        np.random.seed(seed)
        np.random.shuffle(self.files)
        
        n = len(self.files)
        if split == "train":
            self.files = self.files[:int(0.7 * n)]
        elif split == "val":
            self.files = self.files[int(0.7 * n):int(0.85 * n)]
        else:  # test
            self.files = self.files[int(0.85 * n):]
    
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        path = self.files[idx]
        data = np.loadtxt(path)  # (N, 7): x,y,z,r,g,b,label
        xyz = data[:, :3]
        rgb = data[:, 3:6] / 255.0
        labels = data[:, 6].astype(np.int64)
        
        N = xyz.shape[0]
        if N < self.num_points:
            indices = np.random.choice(N, self.num_points, replace=True)
        else:
            indices = np.random.choice(N, self.num_points, replace=False)
        
        xyz = xyz[indices]
        rgb = rgb[indices]
        labels = labels[indices]
        
        centroid = np.mean(xyz, axis=0)
        xyz = xyz - centroid
        
        scale = np.max(np.linalg.norm(xyz, axis=1))
        if scale > 0:
            xyz = xyz / scale
        
        features = np.concatenate([xyz, rgb], axis=1)  # (N, 6)
        return torch.from_numpy(features).float(), torch.from_numpy(labels).long()

In [29]:
def square_distance(src, dst):
    """–í—ã—á–∏—Å–ª—è–µ—Ç –∫–≤–∞–¥—Ä–∞—Ç —Ä–∞—Å—Å—Ç–æ—è–Ω–∏—è –º–µ–∂–¥—É –∫–∞–∂–¥–æ–π –ø–∞—Ä–æ–π —Ç–æ—á–µ–∫ –∏–∑ src –∏ dst.
    Args:
        src: (B, N, C)
        dst: (B, M, C)
    Returns:
        dist: (B, N, M)
    """
    B, N, _ = src.shape
    _, M, _ = dst.shape
    dist = -2 * torch.matmul(src, dst.transpose(2, 1))
    dist += torch.sum(src ** 2, -1).view(B, N, 1)
    dist += torch.sum(dst ** 2, -1).view(B, 1, M)
    return dist

def index_points(points, idx):
    """–í—ã–±–æ—Ä –ø–æ–¥–º–Ω–æ–∂–µ—Å—Ç–≤–∞ —Ç–æ—á–µ–∫ –ø–æ –∏–Ω–¥–µ–∫—Å–∞–º.
    Args:
        points: (B, N, C)
        idx: (B, npoint) –∏–ª–∏ (B, npoint, nsample)
    Returns:
        new_points: (B, npoint, C) –∏–ª–∏ (B, npoint, nsample, C)
    """
    device = points.device
    B, N, C = points.shape
    view_shape = list(idx.shape)
    view_shape[1:] = [1] * (len(view_shape) - 1)
    expand_shape = list(idx.shape)
    expand_shape[0] = B
    repeat_shape = list(idx.shape)
    repeat_shape[0] = 1
    batch_indices = torch.arange(B, dtype=torch.long, device=device).view(view_shape).expand(expand_shape)
    new_points = points[batch_indices, idx, :]
    return new_points

def farthest_point_sample(xyz, npoint):
    """Farthest Point Sampling (FPS)
    Args:
        xyz: (B, N, 3)
        npoint: int
    Returns:
        centroids: (B, npoint)
    """
    device = xyz.device
    B, N, C = xyz.shape
    centroids = torch.zeros(B, npoint, dtype=torch.long, device=device)
    distance = torch.ones(B, N, device=device) * 1e10
    farthest = torch.randint(0, N, (B,), dtype=torch.long, device=device)
    batch_indices = torch.arange(B, dtype=torch.long, device=device)
    for i in range(npoint):
        centroids[:, i] = farthest
        centroid = xyz[batch_indices, farthest, :].view(B, 1, C)
        dist = torch.sum((xyz - centroid) ** 2, -1)
        mask = dist < distance
        distance[mask] = dist[mask]
        farthest = torch.max(distance, -1)[1]
    return centroids

def query_ball_point(radius, nsample, xyz, new_xyz):
    """Ball Query
    Args:
        radius: float
        nsample: int
        xyz: (B, N, 3) ‚Äî –≤—Å–µ —Ç–æ—á–∫–∏
        new_xyz: (B, npoint, 3) ‚Äî —Ü–µ–Ω—Ç—Ä—ã
    Returns:
        idx: (B, npoint, nsample)
        grouped_xyz: (B, npoint, nsample, 3)
    """
    B, N, C = xyz.shape
    _, npoint, _ = new_xyz.shape
    group_idx = torch.arange(N, dtype=torch.long, device=xyz.device).view(1, 1, N).repeat([B, npoint, 1])
    sqrdists = square_distance(new_xyz, xyz)
    group_idx[sqrdists > radius ** 2] = N
    group_idx = group_idx.sort(dim=-1)[0][:, :, :nsample]
    group_first = group_idx[:, :, 0].view(B, npoint, 1).repeat([1, 1, nsample])
    mask = group_idx == N
    group_idx[mask] = group_first[mask]
    return group_idx

In [32]:
class PointNetSetAbstraction(nn.Module):
    def __init__(self, npoint, radius, nsample, in_channel, mlp, group_all=False):
        super().__init__()
        self.npoint = npoint
        self.radius = radius
        self.nsample = nsample
        self.group_all = group_all
        self.mlp_convs = nn.ModuleList()
        self.mlp_bns = nn.ModuleList()
        last_channel = in_channel
        for out_channel in mlp:
            self.mlp_convs.append(nn.Conv2d(last_channel, out_channel, 1))
            self.mlp_bns.append(nn.BatchNorm2d(out_channel))
            last_channel = out_channel

    def forward(self, xyz, points):
        """
        xyz: (B, N, 3)
        points: (B, N, C), C >= 3 (–∫–æ–æ—Ä–¥–∏–Ω–∞—Ç—ã + –ø—Ä–∏–∑–Ω–∞–∫–∏)
        Returns:
            new_xyz: (B, npoint, 3)
            new_points: (B, npoint, mlp[-1])
        """
        if self.group_all:
            new_xyz = torch.zeros_like(xyz[:, :1, :])
            new_points = points.transpose(1, 2).unsqueeze(-1)
            for i, conv in enumerate(self.mlp_convs):
                bn = self.mlp_bns[i]
                new_points = F.relu(bn(conv(new_points)))
            new_points = torch.max(new_points, 2)[0].transpose(1, 2)
            return new_xyz, new_points

        # FPS
        B, N, C = xyz.shape
        S = self.npoint
        fps_idx = farthest_point_sample(xyz, S)  # (B, S)
        new_xyz = index_points(xyz, fps_idx)      # (B, S, 3)
        idx = query_ball_point(self.radius, self.nsample, xyz, new_xyz)  # (B, S, nsample)
        grouped_xyz = index_points(xyz, idx)                            # (B, S, nsample, 3)
        grouped_xyz_norm = grouped_xyz - new_xyz.view(B, S, 1, 3)

        if points is not None:
            grouped_points = index_points(points, idx)  # (B, S, nsample, C)
            grouped_points = torch.cat([grouped_xyz_norm, grouped_points], dim=-1)  # (B, S, nsample, C+3)
        else:
            grouped_points = grouped_xyz_norm

        grouped_points = grouped_points.permute(0, 3, 2, 1)  # (B, C+3, nsample, S)
        for i, conv in enumerate(self.mlp_convs):
            bn = self.mlp_bns[i]
            grouped_points = F.relu(bn(conv(grouped_points)))
        new_points = torch.max(grouped_points, 2)[0].transpose(1, 2)  # (B, S, mlp[-1])
        return new_xyz, new_points

In [33]:
total_points = 0
for f in area_files:
    with open(f, "rb") as fh:
        import numpy.lib.format as npfmt
        version = npfmt.read_magic(fh)
        header = npfmt._read_array_header(fh, version)
        shape = header[1]
        total_points += shape[0]

print(f"–û–±—â–µ–µ —á–∏—Å–ª–æ —Ç–æ—á–µ–∫: {total_points:,}")

NPY_PATH = os.path.join(OUTPUT_DIR, "s3dis_full.npy")
with open(NPY_PATH, "wb") as f_out:
    import struct
    import numpy as np

    header_dict = {
        'descr': '<f4',           # float32, little-endian
        'fortran_order': False,
        'shape': (total_points, 9)
    }
    header_str = f"{{'descr': '{header_dict['descr']}', 'fortran_order': {header_dict['fortran_order']}, 'shape': {header_dict['shape']}}}\n"
    
    header_bytes = header_str.encode('latin1')
    header_len = len(header_bytes)  
    

    padding = (64 - (10 + 2 + header_len) % 64) % 64
    header_len_padded = header_len + padding  

    header = (
        b'\x93NUMPY\x01\x00\x00\x00' +            # magic (10 –±–∞–π—Ç)
        struct.pack('<H', header_len_padded) +    # –¥–ª–∏–Ω–∞ –∑–∞–≥–æ–ª–æ–≤–∫–∞ (–±–µ–∑ magic –∏ —ç—Ç–æ–≥–æ uint16!) ‚Äî 2 –±–∞–π—Ç–∞
        header_bytes +                            # —Å–∞–º –∑–∞–≥–æ–ª–æ–≤–æ–∫ –≤ latin1
        b' ' * padding +                          # padding –ø—Ä–æ–±–µ–ª–∞–º–∏
        b'\n'                                     # –∑–∞–≤–µ—Ä—à–∞—é—â–∏–π \n —É–∂–µ –≤ header_bytes, –Ω–æ –µ—Å–ª–∏ –Ω–µ—Ç ‚Äî –¥–æ–±–∞–≤–∏–º —è–≤–Ω–æ
    )
    
    if not header.endswith(b'\n'):
        header = header.rstrip(b' \n') + b'\n'

    f_out.write(header)

    written = 0
    for f_path in area_files:
        print(f"–î–æ–±–∞–≤–ª—è—é {os.path.basename(f_path)}...")
        arr = np.load(f_path, mmap_mode='r')
        if arr.dtype != np.float32:
            arr = arr.astype(np.float32)
        if arr.shape[1] != 9:
            raise ValueError(f"–§–∞–π–ª {f_path} –∏–º–µ–µ—Ç {arr.shape[1]} –∫–æ–ª–æ–Ω–æ–∫, –æ–∂–∏–¥–∞–µ—Ç—Å—è 9.")
        arr.tofile(f_out)
        written += arr.shape[0]
        del arr

print(f"–ò—Ç–æ–≥–æ–≤—ã–π —Ñ–∞–π–ª —Å–æ–∑–¥–∞–Ω: {NPY_PATH}")
print(f"   –†–∞–∑–º–µ—Ä: {os.path.getsize(NPY_PATH) / 1e9:.2f} GB")
print(f"   –ó–∞–ø–∏—Å–∞–Ω–æ —Ç–æ—á–µ–∫: {written:,} (–æ–∂–∏–¥–∞–ª–æ—Å—å: {total_points:,})")
assert written == total_points, "–û—à–∏–±–∫–∞: –Ω–µ—Å–æ–≤–ø–∞–¥–µ–Ω–∏–µ —á–∏—Å–ª–∞ –∑–∞–ø–∏—Å–∞–Ω–Ω—ã—Ö —Ç–æ—á–µ–∫"

–û–±—â–µ–µ —á–∏—Å–ª–æ —Ç–æ—á–µ–∫: 0
–ò—Ç–æ–≥–æ–≤—ã–π —Ñ–∞–π–ª —Å–æ–∑–¥–∞–Ω: ./output\s3dis_full.npy
   –†–∞–∑–º–µ—Ä: 0.00 GB
   –ó–∞–ø–∏—Å–∞–Ω–æ —Ç–æ—á–µ–∫: 0 (–æ–∂–∏–¥–∞–ª–æ—Å—å: 0)


In [None]:
import torch.nn.functional as F

class PointNetFeaturePropagation(nn.Module):
    def __init__(self, in_channel, mlp):
        super().__init__()
        self.mlp_convs = nn.ModuleList()
        self.mlp_bns = nn.ModuleList()
        last_channel = in_channel
        for out_channel in mlp:
            self.mlp_convs.append(nn.Conv1d(last_channel, out_channel, 1))
            self.mlp_bns.append(nn.BatchNorm1d(out_channel))
            last_channel = out_channel

    def forward(self, xyz1, xyz2, points1, points2):
        """
        xyz1: (B, N, 3) ‚Äî –∏—Å—Ö–æ–¥–Ω—ã–µ —Ç–æ—á–∫–∏ (–±–æ–ª–µ–µ –≥—É—Å—Ç—ã–µ)
        xyz2: (B, M, 3) ‚Äî –±–æ–ª–µ–µ —Ä–∞–∑—Ä–µ–∂–µ–Ω–Ω—ã–µ
        points1: (B, N, C1) ‚Äî –ø—Ä–∏–∑–Ω–∞–∫–∏ xyz1 (–º–æ–∂–µ—Ç –±—ã—Ç—å None)
        points2: (B, M, C2) ‚Äî –ø—Ä–∏–∑–Ω–∞–∫–∏ xyz2
        –í–µ—Ä–Ω—ë–º: (B, N, mlp[-1])
        """
        B, N, _ = xyz1.shape
        _, M, _ = xyz2.shape

        if M == 1:
            interpolated_points = points2.repeat(1, N, 1)
        else:
            dists = square_distance(xyz1, xyz2)
            dists, idx = dists.sort(dim=-1)
            dists, idx = dists[:, :, :3], idx[:, :, :3]  # 3 –±–ª–∏–∂–∞–π—à–∏—Ö —Å–æ—Å–µ–¥–∞

            # –í–∑–≤–µ—à–µ–Ω–Ω–æ–µ –ø–æ —Ä–∞—Å—Å—Ç–æ—è–Ω–∏—é –∏–Ω—Ç–µ—Ä–ø–æ–ª–∏—Ä–æ–≤–∞–Ω–∏–µ
            dist_recip = 1.0 / (dists + 1e-8)
            norm = torch.sum(dist_recip, dim=2, keepdim=True)
            weight = dist_recip / norm
            interpolated_points = torch.sum(index_points(points2, idx) * weight.view(B, N, 3, 1), dim=2)

        if points1 is not None:
            new_points = torch.cat([points1, interpolated_points], dim=-1)  # (B, N, C1 + C2)
        else:
            new_points = interpolated_points

        new_points = new_points.permute(0, 2, 1)  # (B, C, N)
        for i, conv in enumerate(self.mlp_convs):
            bn = self.mlp_bns[i]
            new_points = F.relu(bn(conv(new_points)))
        new_points = new_points.permute(0, 2, 1)  # (B, N, mlp[-1])
        return new_points

–ó–∞–≥—Ä—É–∑–∫–∞ 2 –∫–æ–º–Ω–∞—Ç –∏–∑ C:/Users/anast/ml/CVin3D/lab4/task2/Stanford3dDataset\Area_1...
‚úÖ –ó–∞–≥—Ä—É–∂–µ–Ω–æ 62 –±–ª–æ–∫–æ–≤.
–†–∞–∑–º–µ—Ä –±–ª–æ–∫–∞: torch.Size([4096, 6])


In [None]:
# –ì–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã
NUM_CLASSES = 13
NUM_POINTS = 4096
BATCH_SIZE = 8
EPOCHS = 50
LR = 0.001
WEIGHT_DECAY = 1e-4

# –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è
model = PointNet2Seg(num_classes=NUM_CLASSES, in_channels=6).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
scheduler = StepLR(optimizer, step_size=20, gamma=0.5)

# –î–∞—Ç–∞—Å–µ—Ç—ã –∏ –∑–∞–≥—Ä—É–∑—á–∏–∫–∏
train_dataset = S3DISDataset(split="train", num_points=NUM_POINTS)
val_dataset = S3DISDataset(split="val", num_points=NUM_POINTS)
test_dataset = S3DISDataset(split="test", num_points=NUM_POINTS)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# –õ–æ–≥–∏—Ä–æ–≤–∞–Ω–∏–µ
train_losses = []
val_losses = []
val_oas = []
val_mious = []

In [None]:
def calculate_iou_per_class(conf_matrix):
    iou_per_class = []
    for i in range(conf_matrix.shape[0]):
        tp = conf_matrix[i, i]
        fp = conf_matrix[:, i].sum() - tp
        fn = conf_matrix[i, :].sum() - tp
        denom = tp + fp + fn
        iou = tp / denom if denom > 0 else 0.0
        iou_per_class.append(iou)
    return np.array(iou_per_class)

def evaluate_model(model, loader, num_classes=13, device=device):
    model.eval()
    total_correct = 0
    total_points = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for points, labels in tqdm(loader, desc="Evaluating", leave=False):
            points, labels = points.to(device), labels.to(device)  # (B, N, 6), (B, N)
            logits = model(points)  # (B, C, N)
            preds = torch.argmax(logits, dim=1)  # (B, N)
            
            total_correct += (preds == labels).sum().item()
            total_points += labels.numel()
            
            all_preds.append(preds.cpu().numpy().flatten())
            all_labels.append(labels.cpu().numpy().flatten())
    
    oa = total_correct / total_points
    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)
    conf_mat = confusion_matrix(all_labels, all_preds, labels=np.arange(num_classes))
    iou_per_class = calculate_iou_per_class(conf_mat)
    mIoU = np.nanmean(iou_per_class)
    
    return oa, mIoU, iou_per_class

‚úÖ –§–æ—Ä–º–∞ –≤—ã—Ö–æ–¥–∞: torch.Size([2, 4096, 13])


In [None]:
print("Starting training...")

for epoch in range(1, EPOCHS + 1):
    model.train()
    epoch_loss = 0.0
    
    for points, labels in tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS}", leave=False):
        optimizer.zero_grad()
        points, labels = points.to(device), labels.to(device)
        logits = model(points)  # (B, C, N)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    
    avg_train_loss = epoch_loss / len(train_loader)
    train_losses.append(avg_train_loss)
    
    # –í–∞–ª–∏–¥–∞—Ü–∏—è
    val_loss = 0.0
    model.eval()
    with torch.no_grad():
        for points, labels in val_loader:
            points, labels = points.to(device), labels.to(device)
            logits = model(points)
            loss = criterion(logits, labels)
            val_loss += loss.item()
    avg_val_loss = val_loss / len(val_loader)
    val_losses.append(avg_val_loss)
    
    oa, mIoU, _ = evaluate_model(model, val_loader, NUM_CLASSES)
    val_oas.append(oa)
    val_mious.append(mIoU)
    
    scheduler.step()
    
    print(f"Epoch {epoch:2d} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val OA: {oa:.4f} | Val mIoU: {mIoU:.4f}")

print("Training completed.")

‚úÖ –ó–∞–ø—É—Å–∫ –æ–±—É—á–µ–Ω–∏—è (–±–µ–∑ .view ‚Äî —Ç–æ–ª—å–∫–æ .reshape)...
Epoch 1 | Avg Loss (3 steps): 2.4620
Epoch 2 | Avg Loss (3 steps): 1.9567
üî• –£–°–ü–ï–•! –ú–æ–¥–µ–ª—å –æ–±—É—á–∞–µ—Ç—Å—è –Ω–∞ CPU.


In [None]:
test_oa, test_mIoU, test_iou_per_class = evaluate_model(model, test_loader, NUM_CLASSES)
print(f"Test Overall Accuracy: {test_oa:.4f}")
print(f"Test mIoU: {test_mIoU:.4f}")

# IoU –ø–æ –∫–ª–∞—Å—Å–∞–º (S3DIS –ø–æ—Ä—è–¥–æ–∫)
CLASS_NAMES = [
    "ceiling", "floor", "wall", "beam", "column", "window", "door",
    "table", "chair", "sofa", "bookcase", "board", "clutter"
]

iou_df = {
    "Class": CLASS_NAMES,
    "IoU": test_iou_per_class
}
import pandas as pd
df = pd.DataFrame(iou_df)
print("\nIoU per class:")
print(df.to_string(index=False, float_format="{:.4f}".format))