In [1]:
import math
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.parameter import Parameter
from torch.autograd import Variable
class GraphConvolution(nn.Module):
    def __init__(self, in_features, out_features, residual=False, variant=False):
        super(GraphConvolution, self).__init__()
        self.variant = variant
        if self.variant:
            self.in_features = 2*in_features
        else:
            self.in_features = in_features

        self.out_features = out_features
        self.residual = residual
        self.weight = Parameter(torch.FloatTensor(self.in_features,self.out_features))
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.out_features)
        self.weight.data.uniform_(-stdv, stdv)

    def forward(self, input, adj , h0 , lamda, alpha, l):
        theta = min(1, math.log(lamda/l+1))
        hi = torch.spmm(adj, input)
        if self.variant:
            support = torch.cat([hi,h0],1)
            r = (1-alpha)*hi+alpha*h0
        else:
            support = (1-alpha)*hi+alpha*h0
            r = support
        output = theta*torch.mm(support, self.weight)+(1-theta)*r
        if self.residual: # speed up convergence of the training process
            output = output+input
        return output


class deepGCN(nn.Module):
    def __init__(self, nlayers, nfeat, nhidden, nclass, dropout, lamda, alpha, variant):
        super(deepGCN, self).__init__()
        self.convs = nn.ModuleList()
        for _ in range(nlayers):
            self.convs.append(GraphConvolution(nhidden, nhidden,variant=variant,residual=True))
        self.fcs = nn.ModuleList()
        self.fcs.append(nn.Linear(nfeat, nhidden))
        self.fcs.append(nn.Linear(nhidden, nclass))
        self.act_fn = nn.ReLU()
        self.dropout = dropout
        self.alpha = alpha
        self.lamda = lamda

    def forward(self, x, adj):
        _layers = []
        x = F.dropout(x, self.dropout, training=self.training)
        layer_inner = self.act_fn(self.fcs[0](x))
        _layers.append(layer_inner)
        for i,con in enumerate(self.convs):
            layer_inner = F.dropout(layer_inner, self.dropout, training=self.training)
            layer_inner = self.act_fn(con(layer_inner,adj,_layers[0],self.lamda,self.alpha,i+1))
        layer_inner = F.dropout(layer_inner, self.dropout, training=self.training)
        layer_inner = self.fcs[-1](layer_inner)
        return layer_inner


class GraphPPIS(nn.Module):
    def __init__(self, nlayers, nfeat, nhidden, nclass, dropout, lamda, alpha, variant):
        super(GraphPPIS, self).__init__()

        self.deep_gcn = deepGCN(nlayers = nlayers, nfeat = nfeat, nhidden = nhidden, nclass = nclass,
                                dropout = dropout, lamda = lamda, alpha = alpha, variant = variant)
        self.criterion = nn.CrossEntropyLoss() # automatically do softmax to the predicted value and one-hot to the label
        # self.optimizer = torch.optim.Adam(self.parameters(), lr = LEARNING_RATE, weight_decay = WEIGHT_DECAY)

    def forward(self, x, adj):          # x.shape = (seq_len, FEATURE_DIM); adj.shape = (seq_len, seq_len)
        x = x.float()
        output = self.deep_gcn(x, adj)  # output.shape = (seq_len, NUM_CLASSES)
        return output

In [2]:
MAP_CUTOFF = 14
HIDDEN_DIM = 256
LAYER = 8
DROPOUT = 0.1
ALPHA = 0.7
LAMBDA = 1.5
VARIANT = True # From GCNII

LEARNING_RATE = 1E-3
WEIGHT_DECAY = 0
BATCH_SIZE = 1
NUM_CLASSES = 2 # [not bind, bind]

INPUT_DIM=54
model=GraphPPIS(LAYER, INPUT_DIM, HIDDEN_DIM, NUM_CLASSES, DROPOUT, LAMBDA, ALPHA, VARIANT)

In [3]:
model.load_state_dict(torch.load('GraphPPIS_slow.pkl',map_location=torch.device('cpu')))

<All keys matched successfully>

In [4]:
model

GraphPPIS(
  (deep_gcn): deepGCN(
    (convs): ModuleList(
      (0): GraphConvolution()
      (1): GraphConvolution()
      (2): GraphConvolution()
      (3): GraphConvolution()
      (4): GraphConvolution()
      (5): GraphConvolution()
      (6): GraphConvolution()
      (7): GraphConvolution()
    )
    (fcs): ModuleList(
      (0): Linear(in_features=54, out_features=256, bias=True)
      (1): Linear(in_features=256, out_features=2, bias=True)
    )
    (act_fn): ReLU()
  )
  (criterion): CrossEntropyLoss()
)

In [5]:
from torch.utils.data import Dataset, DataLoader
import pickle
import torch.nn as nn
import torch


class Config():
    def __init__(self):
        self.epochs = 100
        self.lr = 0.001
        self.train_data_path = '../数据集/train_335.pkl'
        self.test_data_path = '../数据集/Test_60.pkl'
        self.test_315= '../数据集/Test_315.pkl'
        self.seed = 10
        self.split_rate = 0.2
        self.batch_size = 1
        self.save_path = '../result/'
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.loss_fun = nn.CrossEntropyLoss().to(self.device)
        self.Threashold = 0.2

        for name, value in vars(self).items():
            print(name, value)

        self.save_txt = 'result.txt'

        # GN
        self.MAP_CUTOFF = 14
        self.HIDDEN_DIM = 256
        self.LAYER = 8
        self.DROPOUT = 0.1
        self.ALPHA = 0.7
        self.LAMBDA = 1.5
        self.VARIANT = True  # From GCNII

        self.WEIGHT_DECAY = 0
        self.BATCH_SIZE = 1
        self.NUM_CLASSES = 1  # [not bind, bind]
        self.INPUT_DIM = 54

        self.negative_slope = 0.3
        # self.leaky_relu=nn.LeakyReLU(self.negative_slope)
        self.leaky_relu = nn.ReLU()

        self.drop_layer = nn.Dropout(0.3)

        self.batch_norm = nn.BatchNorm1d(32).to(self.device)
config=Config()
def normalize(mx):
    rowsum = np.array(mx.sum(1))
    r_inv = (rowsum ** (-0.5)).flatten()
    r_inv[np.isinf(r_inv)] = 0
    r_inv[np.isnan(r_inv)] = 0
    r_mat_inv = np.diag(r_inv)
    result = r_mat_inv @ mx @ r_mat_inv
    return result

class Test_Data(Dataset):
    def __init__(self, data_path):
        df = open(data_path, 'rb')
        self.raw_data = pickle.load(df)
        self.protein_list = list(self.raw_data.keys())

    def __getitem__(self, index):
        protein_name = self.protein_list[index]
        protein_inf = self.raw_data[protein_name]
        labels = torch.tensor(np.array([float(i) for i in protein_inf['label']]), requires_grad=True).float().to(
            config.device)
        seq_emb = torch.tensor(np.squeeze(protein_inf['seq_emb']), requires_grad=True).squeeze().to(config.device)
        structure_emb = torch.tensor(np.squeeze(normalize(protein_inf['s2'])),
                                     requires_grad=True).squeeze().to(config.device)

        dssp = torch.tensor(np.squeeze(protein_inf['dssp']), requires_grad=True).to(config.device)
        hmm = torch.tensor(np.squeeze(protein_inf['hmm']), requires_grad=True).to(config.device)
        pssm = torch.tensor(np.squeeze(protein_inf['pssm']), requires_grad=True).to(config.device)
        return dssp, hmm, pssm, seq_emb, structure_emb, labels

    def __len__(self):
        return len(self.protein_list)

test_data = Test_Data(data_path=config.test_315)
eval_data_loader = DataLoader(dataset=test_data, batch_size=config.batch_size,shuffle=True)

epochs 100
lr 0.001
train_data_path ../数据集/train_335.pkl
test_data_path ../数据集/Test_60.pkl
test_315 ../数据集/Test_315.pkl
seed 10
split_rate 0.2
batch_size 1
save_path ../result/
device cpu
loss_fun CrossEntropyLoss()
Threashold 0.2


In [6]:
import sklearn.metrics as metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import average_precision_score
from sklearn.metrics import matthews_corrcoef, accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_curve

def eval(eval_data_loader, model, epoch):
    model.eval()
    pred=[]
    label=[]
    for i, (dssp, hmm, pssm, seq_emb, structure_emb, labels) in enumerate(eval_data_loader):
        # Every data instance is an input + label pair
        seq_emb = seq_emb.squeeze().to(config.device)
        structure_emb = structure_emb.squeeze().to(config.device)
        labels = labels.squeeze().unsqueeze(dim=-1).to(config.device)
        dssp = dssp.squeeze().to(config.device)
        hmm = hmm.squeeze().to(config.device)
        pssm = pssm.squeeze().to(config.device)

        node_features=torch.cat((pssm,hmm,dssp),dim=1).to(torch.float)
        structure_emb=structure_emb.to(torch.float)
        y_pred = model(node_features, structure_emb)
        softmax = torch.nn.Softmax(dim=1)
        y_pred = softmax(y_pred)
        y_pred = y_pred.cpu().detach().numpy()
        pred += [pred[1] for pred in y_pred]
        label+=[float(l) for l in labels]
    return pred,label



In [7]:
pred,label=eval(eval_data_loader,model,0)

[0.13942364,
 0.3876789,
 0.063507825,
 0.23615764,
 0.060792625,
 0.15969332,
 0.037329253,
 0.0739717,
 0.062388223,
 0.12879464,
 0.10886415,
 0.055108417,
 0.19469222,
 0.057752706,
 0.028229969,
 0.10543079,
 0.36642578,
 0.03391277,
 0.10819287,
 0.36907944,
 0.3341548,
 0.5186997,
 0.4633626,
 0.5162193,
 0.55679697,
 0.5041631,
 0.5240414,
 0.2852365,
 0.2505785,
 0.083142385,
 0.103625216,
 0.09379938,
 0.14040156,
 0.045036487,
 0.1905177,
 0.11467462,
 0.40390736,
 0.14410408,
 0.6501423,
 0.48478803,
 0.5248542,
 0.2295889,
 0.42392522,
 0.28451908,
 0.15427381,
 0.11120022,
 0.11332603,
 0.06662278,
 0.3166683,
 0.12424417,
 0.11223361,
 0.098574415,
 0.33569202,
 0.0651864,
 0.2047081,
 0.15535411,
 0.24869001,
 0.29835194,
 0.2969261,
 0.14491779,
 0.3321119,
 0.33989367,
 0.43251947,
 0.31797087,
 0.40233436,
 0.4894945,
 0.36346823,
 0.3159883,
 0.5669781,
 0.52583396,
 0.38522,
 0.40522444,
 0.22555676,
 0.1405046,
 0.6266225,
 0.14592354,
 0.03730271,
 0.15115939,
 0

[1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0

In [8]:
fpr, tpr, _ = roc_curve(label, pred)
auroc = metrics.roc_auc_score(label, pred)

auprc =  average_precision_score(label, pred)

In [9]:
auroc,auprc

(0.7973225602451445, 0.42130765553807925)

In [10]:
for i in range(0, len(pred)):
            if (pred[i] > 0.18):
                pred[i] = 1
            else:
                pred[i] = 0

In [11]:
acc1 = accuracy_score(label, pred, sample_weight=None)
# spec1 = spec1 + (cm1[0, 0]) / (cm1[0, 0] + cm1[0, 1])
recall = recall_score(label, pred, sample_weight=None)
prec1 = precision_score(label, pred, sample_weight=None)
f1 = f1_score(label, pred)
mcc = matthews_corrcoef(label, pred)
acc1,recall,prec1,f1,mcc

(0.7405672651574291,
 0.6857295563869589,
 0.3140912651782217,
 0.4308405252023238,
 0.3290373905451128)

In [15]:

# for name,param_tensor in model.named_parameters():
#     print(name,param_tensor.detach().numpy())
dic={}
for name,param_tensor in model.named_parameters():
    dic[name]=param_tensor.data


In [16]:
import pickle
pickle.dump(dic,open('weight.pkl','wb'))


In [110]:
for key, value in model.state_dict().items():
    print(key)
dic.keys()

deep_gcn.convs.0.weight
deep_gcn.convs.1.weight
deep_gcn.convs.2.weight
deep_gcn.convs.3.weight
deep_gcn.convs.4.weight
deep_gcn.convs.5.weight
deep_gcn.convs.6.weight
deep_gcn.convs.7.weight
deep_gcn.fcs.0.weight
deep_gcn.fcs.0.bias
deep_gcn.fcs.1.weight
deep_gcn.fcs.1.bias


dict_keys(['deep_gcn.convs.0.weight', 'deep_gcn.convs.1.weight', 'deep_gcn.convs.2.weight', 'deep_gcn.convs.3.weight', 'deep_gcn.convs.4.weight', 'deep_gcn.convs.5.weight', 'deep_gcn.convs.6.weight', 'deep_gcn.convs.7.weight', 'deep_gcn.fcs.0.weight', 'deep_gcn.fcs.0.bias', 'deep_gcn.fcs.1.weight', 'deep_gcn.fcs.1.bias'])

In [4]:
import torch
import torch.nn as nn
a=torch.zeros((2,100,10))
n=nn.Linear(10,12)
n(a).shape

torch.Size([2, 100, 12])

In [2]:

import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)

data

Data(x=[3, 1], edge_index=[2, 4])