# Train

In [1]:
import os 
import torch
import pathlib

import torch.optim as optim
import torch.nn as nn
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm.auto import tqdm
from torch.utils.data import Dataset, DataLoader
from datachallengecode import load_data
from datachallengecode import metric
from pointnet import PointNet

from torchmetrics import MeanMetric, Accuracy, F1Score

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
class2idx = {
    "background": 0,
    "beams": 1,
    "cabletrays": 2,
    "civils": 3,
    "gratings": 4,
    "guardrails": 5,
    "hvac": 6,
    "ladders": 7,
    "piping": 8,
    "supports": 9,
}

n_classes = len(class2idx)

In [3]:
data_path = "data"

ytrain_path = os.path.join(data_path, "ytrain.csv")
xtrain_path = os.path.join(data_path, "xtrain")

ytrain_map__path = os.path.join(data_path, "ytrain_map_ind_station.csv")

In [19]:
class PlyPointsDataset(Dataset):
    def __init__(self, data_path, station_id=0, cloud_size=2048, transform=None):
        ytrain_path = os.path.join(data_path, "ytrain.csv")
        xtrain_path = os.path.join(data_path, "xtrain")
        ply_file = os.path.join(xtrain_path, f"SCAN_{station_id}.ply")

        ytrain_map__path = os.path.join(data_path, "ytrain_map_ind_station.csv")
        
        _, x = load_data.read_x_plyfile(ply_file)
        rest = cloud_size - (x.shape[0] % cloud_size)
        x = np.concatenate([x, x[:rest, :]])
        x[:, :3] = x[:, :3] / (x[:, :3].max() - x[:, :3].min())
        x[:, 3:6] = x[:, 3:6] / 255
        x[:, 6] = x[:, 6] / 255
        self.x = x.reshape(-1, cloud_size, x.shape[-1]).transpose(0, 2, 1)

        y_map = pd.read_csv(ytrain_map__path, header=None, names=["station_id", "point_id_low", "point_id_high"])
        y_map.set_index("station_id", inplace=True)
        low, high = y_map.loc[station_id]
        y = pd.read_csv(ytrain_path)
        y.set_index("ID", inplace=True)
        y = y.loc[low:high]["class"].to_numpy()
        y = np.concatenate([y, y[:rest]])
        self.y = y.reshape(-1, cloud_size)
        
        self.cloud_size = cloud_size
        self.station_id = station_id
        self.transform = transform
        
    def __len__(self):
        return self.y.shape[0]
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = (self.x[idx], self.y[idx])
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample
    

def transform(sample):
    x, y = sample
    
    x = torch.from_numpy(x).to(DEVICE)
    y = torch.from_numpy(y).to(DEVICE)
    
    return (x, y)

In [20]:
def evaluate(net, dataloader, criterion):
    mean = MeanMetric()
    f1 = F1Score(num_classes=n_classes, mdmc_reduce="global")
    
    for i, (inputs, labels) in enumerate(tqdm(eval_dataloader), 0):
        with torch.no_grad():
            outputs, _ = net(inputs)
            
        preds = outputs.argmax(dim=1)
        
        loss = criterion(outputs, labels).item()
        mean.update(loss)
        f1.update(outputs, labels)
        
    return mean.compute(), f1.compute()

## PointNet++

In [21]:
net = PointNet(num_classes=n_classes).to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [22]:
train_ply_ids = sorted(map(lambda path: int(pathlib.Path(path).stem.split("_")[-1]), os.listdir(xtrain_path)))
print(f"The ids used for train are {train_ply_ids}")

The ids used for train are [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 37, 38, 39, 40, 41, 42, 44, 45, 46, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66]


In [23]:
eval_dataset = PlyPointsDataset(data_path, station_id=66, transform=transform)

eval_dataloader = DataLoader(eval_dataset, batch_size=8, shuffle=False, num_workers=0)

In [None]:
losses = []
eval_losses = []
eval_f1s = []

log_steps = 50
mean = MeanMetric()
for ply_id in train_ply_ids[:5]:
    train_dataset = PlyPointsDataset(data_path, station_id=ply_id, transform=transform)

    train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0)

    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_dataloader, 0):
        optimizer.zero_grad()
        
        outputs, _ = net(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        mean.update(loss.item())
        
        running_loss += loss.item()
        if i % log_steps == log_steps-1:
            print(f"[ply_id: {ply_id}, {i + 1:5d}] loss: {running_loss / log_steps:.3f}")
            running_loss = 0.0
    
    net.eval()
    eval_loss, eval_f1 = evaluate(net, eval_dataloader, criterion)
    net.train()
    
    print(f"[ply_id: 66, eval] loss: {eval_loss:.3f} f1:  {eval_f1:.3f}")
    
    losses.append(mean.compute())    
    eval_losses.append(eval_loss)
    eval_f1s.append(eval_f1)


[ply_id: 0,    50] loss: 1.555
[ply_id: 0,   100] loss: 1.072


  0%|          | 0/141 [00:00<?, ?it/s]

[ply_id: 66, eval] loss: 1.066 f1:  0.772
[ply_id: 1,    50] loss: 1.226
[ply_id: 1,   100] loss: 1.199
[ply_id: 1,   150] loss: 1.181


  0%|          | 0/141 [00:00<?, ?it/s]

[ply_id: 66, eval] loss: 0.951 f1:  0.772
[ply_id: 2,    50] loss: 1.539
[ply_id: 2,   100] loss: 1.400
[ply_id: 2,   150] loss: 1.383
[ply_id: 2,   200] loss: 1.375


  0%|          | 0/141 [00:00<?, ?it/s]

[ply_id: 66, eval] loss: 1.293 f1:  0.435
[ply_id: 3,    50] loss: 1.443


In [None]:
fig, ax = plt.subplots(figsize=(9, 6))

ax.plot(losses, label="train")
ax.plot(eval_losses, label="eval")

plt.legend()
plt.title("Loss Plot")
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(9, 6))

ax.plot(eval_f1s, label="eval")

plt.legend()
plt.title("F1 Plot")
plt.show()