In [3]:
from torchdrug import datasets, transforms

# A toy protein structure dataset


class EnzymeCommissionToy(datasets.EnzymeCommission):
    url = "https://miladeepgraphlearningproteindata.s3.us-east-2.amazonaws.com/data/EnzymeCommission.tar.gz"
    md5 = "728e0625d1eb513fa9b7626e4d3bcf4d"
    processed_file = "enzyme_commission_toy.pkl.gz"
    test_cutoffs = [0.3, 0.4, 0.5, 0.7, 0.95]


truncuate_transform = transforms.TruncateProtein(max_length=350, random=False)
protein_view_transform = transforms.ProteinView(view='residue')
transform = transforms.Compose([truncuate_transform, protein_view_transform])

dataset = EnzymeCommissionToy(
    "~/protein-datasets/", transform=transform, atom_feature=None, bond_feature=None)
train_set, valid_set, test_set = dataset.split()
print(dataset)
print("train samples: %d, valid samples: %d, test samples: %d" %
      (len(train_set), len(valid_set), len(test_set)))


18:19:08   Extracting /home/aigenintern2/protein-datasets/EnzymeCommission.tar.gz to /home/aigenintern2/protein-datasets


Loading /home/aigenintern2/protein-datasets/EnzymeCommission/enzyme_commission_toy.pkl.gz: 100%|██████████| 1151/1151 [00:04<00:00, 274.83it/s]


EnzymeCommissionToy(
  #sample: 1151
  #task: 538
)
train samples: 959, valid samples: 97, test samples: 95


In [4]:
from torchdrug import models

gearnet = models.GearNet(input_dim=21, hidden_dims=[512, 512, 512], num_relation=7,
                         batch_norm=True, concat_hidden=True, short_cut=True, readout="sum").to("cuda")


In [5]:
protein = train_set[0]
print(protein['graph'])
print(protein['graph'].num_relation)


Protein(num_atom=2639, num_bond=5368, num_residue=350)
tensor(4)


In [6]:
from torchdrug import tasks, core, layers
from torchdrug.layers import geometry
import torch
graph_construction_model = layers.GraphConstruction(node_layers=[geometry.AlphaCarbonNode()],

                                                    edge_layers=[geometry.SpatialEdge(radius=10.0, min_distance=5),
                                                                 geometry.KNNEdge(
                                                                     k=10, min_distance=5),
                                                                 geometry.SequentialEdge(max_distance=2)],
                                                    edge_feature="gearnet")
task = tasks.MultipleBinaryClassification(gearnet, graph_construction_model=graph_construction_model, num_mlp_layer=3,
                                          task=[_ for _ in range(len(dataset.tasks))], criterion="bce", metric=["auprc@micro", "f1_max"])
optimizer = torch.optim.Adam(task.parameters(), lr=1e-4)
solver = core.Engine(task, train_set, valid_set, test_set, optimizer,
                     gpus=[0], batch_size=4)
solver.train(num_epoch=1)
solver.evaluate("valid")

18:19:17   Preprocess training set
18:19:21   {'batch_size': 4,
 'class': 'core.Engine',
 'gpus': [0],
 'gradient_interval': 1,
 'log_interval': 100,
 'logger': 'logging',
 'num_worker': 0,
 'optimizer': {'amsgrad': False,
               'betas': (0.9, 0.999),
               'class': 'optim.Adam',
               'eps': 1e-08,
               'lr': 0.0001,
               'weight_decay': 0},
 'scheduler': None,
 'task': {'class': 'tasks.MultipleBinaryClassification',
          'criterion': 'bce',
          'graph_construction_model': {'class': 'layers.GraphConstruction',
                                       'edge_feature': 'gearnet',
                                       'edge_layers': [SpatialEdge(),
                                                       KNNEdge(),
                                                       SequentialEdge()],
                                       'node_layers': [AlphaCarbonNode()]},
          'metric': ['auprc@micro', 'f1_max'],
          'model': {'activ

{'auprc@micro': tensor(0.0636, device='cuda:0'),
 'f1_max': tensor(0.1369, device='cuda:0')}