# PyTorch: Prop3D with 3D Structures (MinkowsiEngine)

### Install prereqs: pytorch and MinkowskiEngine

Uncomment if you need to install. For PyTorch GPU installation, follow the instructions on https://pytorch.org/get-started/locally/

In [None]:
import os, sys

In [None]:
#sudo apt install build-essential python3-dev libopenblas-dev

In [None]:
#!{sys.executable} -m pip install --user torch ninja

In [None]:
#old_cwd = os.getcwd()
#!git clone https://github.com/NVIDIA/MinkowskiEngine.git
#os.chdir("MinkowskiEngine")
#!{sys.executable} setup.py install --blas=openblas
#os.chdir(old_cwd)

### Imports

In [None]:
from tqdm import tqdm

import torch
from torch.nn import functional as F

import MinkowskiEngine as ME
import MinkowskiEngine.MinkowskiFunctional as MF
from Prop3D.ml.datasets.DistributedDomainStructureDataset import DistributedDomainStructureDataset

torch.manual_seed(0)
device = "cuda" if torch.cuda.is_available() else "cpu"

### Define parameters

In [None]:
os.environ["HS_ENDPOINT"] = "http://prop3d-hsds.pods.uvarc.io"
os.environ["HS_USERNAME"] = "None"
os.environ["HS_PASSWORD"] = "None"

cath_file = "/CATH/Prop3D-20.h5"
cath_superfamily = "1/10/10/10" #Use / instead of .

use_features = ['H', 'HD', 'HS', 'C', 'A', 'N', 'NA', 'NS', 'OA', 'OS', 'F', 'MG', 'P', 'SA', 'S', 'CL', 'CA', 'MN', 'FE', 'ZN', 'BR', 'I', 'Unk_atom']
predict_features = ['is_electronegative']

### Define UNET model

In [None]:
class UNet(ME.MinkowskiNetwork):

    def __init__(self, in_nchannel, out_nchannel, D):
        super(UNet, self).__init__(D)
        self.block1 = torch.nn.Sequential(
            ME.MinkowskiConvolution(
                in_channels=in_nchannel,
                out_channels=8,
                kernel_size=3,
                stride=1,
                dimension=D),
            ME.MinkowskiBatchNorm(8))

        self.block2 = torch.nn.Sequential(
            ME.MinkowskiConvolution(
                in_channels=8,
                out_channels=16,
                kernel_size=3,
                stride=2,
                dimension=D),
            ME.MinkowskiBatchNorm(16),
        )

        self.block3 = torch.nn.Sequential(
            ME.MinkowskiConvolution(
                in_channels=16,
                out_channels=32,
                kernel_size=3,
                stride=2,
                dimension=D),
            ME.MinkowskiBatchNorm(32))

        self.block3_tr = torch.nn.Sequential(
            ME.MinkowskiConvolutionTranspose(
                in_channels=32,
                out_channels=16,
                kernel_size=3,
                stride=2,
                dimension=D),
            ME.MinkowskiBatchNorm(16))

        self.block2_tr = torch.nn.Sequential(
            ME.MinkowskiConvolutionTranspose(
                in_channels=32,
                out_channels=16,
                kernel_size=3,
                stride=2,
                dimension=D),
            ME.MinkowskiBatchNorm(16))

        self.conv1_tr = ME.MinkowskiConvolution(
            in_channels=24,
            out_channels=out_nchannel,
            kernel_size=1,
            stride=1,
            dimension=D)

    def forward(self, x):
        out_s1 = self.block1(x)
        out = MF.relu(out_s1)

        out_s2 = self.block2(out)
        out = MF.relu(out_s2)

        out_s4 = self.block3(out)
        out = MF.relu(out_s4)

        out = MF.relu(self.block3_tr(out))
        out = ME.cat(out, out_s2)

        out = MF.relu(self.block2_tr(out))
        out = ME.cat(out, out_s1)

        return self.conv1_tr(out)

### Set up Prop3D datasets and dataloaders

In [None]:

dataset_train = DistributedDomainStructureDataset(
    cath_file, 
    cath_superfamily, 
    use_features=use_features, 
    predict_features=predict_features, 
    cluster_level="S100")
training_loader = torch.utils.data.DataLoader(
    dataset_train, 
    batch_size=128, 
    shuffle=True, 
    collate_fn=ME.utils.batch_sparse_collate)
dataset_val = DistributedDomainStructureDataset(
    cath_file, 
    cath_superfamily, 
    use_features=use_features, 
    predict_features=predict_features, 
    cluster_level="S100", 
    validation=True)
val_loader = torch.utils.data.DataLoader(
    dataset_val, 
    batch_size=128, 
    shuffle=False, 
    collate_fn=ME.utils.batch_sparse_collate)

### Start training

In [None]:
model = UNet(len(use_features), len(predict_features), 3)
model.to(device)

In [None]:
optimizer = torch.optim.SGD(
        model.parameters(),
        lr=1e-1,
        momentum=0.9,
        weight_decay=1e-4,
    )
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=100000,
)

In [None]:
def criterion(pred, labels, smoothing=True):
    """Calculate cross entropy loss, apply label smoothing if needed."""

    labels = labels.contiguous().view(-1)
    if smoothing:
        eps = 0.2
        n_class = pred.size(1)

        one_hot = torch.zeros_like(pred).scatter(1, labels.view(-1, 1), 1)
        one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1)
        log_prb = F.log_softmax(pred, dim=1)

        loss = -(one_hot * log_prb).sum(dim=1).mean()
    else:
        loss = F.cross_entropy(pred, labels, reduction="mean")

    return loss

In [None]:
for epoch in range(30):
    for loader, is_train in [(training_loader, True), (val_loader, False)]:
        if is_train:
            model.train()
        else:
            model.eval()
            
        pbar = tqdm(loader)
        for batch in pbar:
            # Every data instance is an input + label pair
            coords, feats, truth = batch
            
            inputs = ME.SparseTensor(
                feats.float(), 
                coords.int(), 
                device=device)
            
            truth = truth.long().to(device)
            
            if is_train:
                # Zero your gradients for every batch!
                optimizer.zero_grad()

            # Make predictions for this batch
            outputs = model(inputs)
    
            # Compute the loss and its gradients
            loss = F.cross_entropy(outputs.F, truth.squeeze())

            if is_train:
                
                loss.backward()

                # Adjust learning weights
                optimizer.step()
                scheduler.step()
                

                name = "TRAIN"

            else:
                name = "VALIDATION"

            torch.cuda.empty_cache()

            pbar.set_description(f"Epoch {epoch} {name} Loss {loss}")

