Necessary imports
dataset: Represents mutations
model: GNN


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append("./src")


from src.make_dataset import make_dataset
from src.dataset import MutationDataset
from src.model import ProBindNN
from src.train import train
from src.visualize import comparator


from torch_geometric.loader import DataLoader
import torch
from torch import nn
from torch.optim.lr_scheduler import ExponentialLR


import copy
import os
import time
from datetime import datetime



Make the dataset if needed

In [None]:
#Uncomment next line to make dataset
#make_dataset(index_xlsx="../index.xlsx", root="../dataset")

Dataset/dataloaders

In [None]:
dataset = MutationDataset(index_xlsx="index.xlsx", root="dataset12aa")
train_size = int(len(dataset)*0.1)
val_size = len(dataset)-train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=512, shuffle=True)
loaders = {"val_loader": val_loader, "train_loader":train_loader}

In [None]:
print("Length test dataset: ", len(val_dataset))
print("Length train dataset: ", len(train_dataset))
print("Take a look at the data: ", dataset[0])

In [None]:
#CUDA/CPU Training
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

In [None]:
dataset[2]

In [None]:

model = ProBindNN(    config={"features_in":15, "layers":30, "gnn_features_out":15, "out_dim":1, "mlp_hidden_dim":[30, 30, 15, 15, 15]}).to(device)

#Uncomment next line if you have  a pretrained model
#model.load_state_dict(torch.load("models/aminos_model_lal2022_09_11_15_36_21.pt"))


Optimizer and Loss Function and Scheduler

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = ExponentialLR(optimizer, gamma=0.9)
loss_fn =  nn.MSELoss()

In [None]:
best_model, path = train(model, loaders, optimizer, loss_fn, scheduler, n_epochs=500)

In [None]:
#change path to point to your model if available

#model.load_state_dict(torch.load(path))
 
#identifier for saved datapoints

t = time.time()
stamp = datetime.utcfromtimestamp(t).strftime('%Y_%m_%d_%H_%M_%S')

N = len(train_dataset)

comparator(best_model,train_dataset, N, "_data/predictions/{}.csv".format(stamp))

If you want to take a look at the raw predictions_

In [None]:
model.eval()
d = next(iter(train_loader))
model(d["mutated"].cuda(), d["non_mutated"].cuda()).squeeze()

In [None]:
from graphein.protein.graphs import construct_graph
from graphein.ml.conversion import GraphFormatConvertor
from graphein.protein.edges.atomic import add_atomic_edges, add_bond_order, add_ring_status
from graphein.protein.edges.distance import add_hydrogen_bond_interactions, add_ionic_interactions, add_peptide_bonds
from graphein.protein.visualisation import plotly_protein_structure_graph
from graphein.protein.config import ProteinGraphConfig

params_to_change = {"granularity": "atom", "edge_construction_functions": [add_atomic_edges, add_bond_order, add_hydrogen_bond_interactions, add_ionic_interactions, add_peptide_bonds]}
config = ProteinGraphConfig(**params_to_change)
graph_mutated = construct_graph(config=config,pdb_path="dataset12aa/raw/temp/1_mutated_interface.pdb")


p = plotly_protein_structure_graph(
    graph_mutated,
    colour_edges_by="kind",
    colour_nodes_by="element_symbol",
    label_node_ids=False,
    node_size_min=5,
    node_alpha=0.85,
    node_size_multiplier=1,
    plot_title="Atom-level graph. Nodes coloured by their Element"
    )
p.show()

In [1]:
import torch
adj = torch.tensor([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0], 
                    [1, 0, 0, 0, 1, 1, 0, 0, 0, 0], 
                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                    [0, 1, 0, 0, 0, 0, 1, 0, 0, 0],
                    [0, 1, 1, 0, 0, 0, 1, 1, 1, 0],
                    [0, 0, 0, 1, 1, 1, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 1, 0, 0, 1, 1],
                    [0, 0, 0, 0, 0, 1, 0, 1, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0, 1, 0, 0]])

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
adj.nonzero().t().contiguous()

tensor([[0, 1, 1, 1, 2, 3, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 9],
        [1, 0, 4, 5, 5, 6, 1, 6, 1, 2, 6, 7, 8, 3, 4, 5, 5, 8, 9, 5, 7, 7]])