In [1]:
import torch
import numpy as np
import  random
import sys
import os
def get_random_seed(seed):
    random.seed(seed)  
    os.environ['PYTHONHASHSEED'] = str(seed)  
    np.random.seed(seed)  
    torch.manual_seed(seed)  
    torch.cuda.manual_seed(seed)  
    torch.backends.cudnn.deterministic = True  
    torch.backends.cudnn.benchmark = False
get_random_seed(20230226)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")  
print(DEVICE)

cuda


In [2]:
current_dir = os.getcwd()
sys.path.append(os.path.join(current_dir, 'saved_models'))

from models.DLCL import DLCL
model = DLCL()
model_path = os.path.join(current_dir, 'saved_models', 'model.pth')
model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [3]:
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, TensorDataset
amino_acids = 'XACDEFGHIKLMNPQRSTVWY'

def getSequenceData(direction: str):
    data, label = [], []
    max_length = 0
    min_length = 8000

    with open(direction) as f:  
        for each in f:  
            each = each.strip()  
            each = each.upper()  
            if each[0] == '>':
                label.append(np.array(list(each[1:]), dtype=int))  # Converting string labels to numeric vectors
            else:
                if len(each) > max_length:  
                    max_length = len(each)
                elif len(each) < min_length: 
                    min_length = len(each)
                data.append(each)

    return np.array(data), np.array(label), max_length, min_length

def PadEncode(data, label, max_len: int = 50):
    # 序列编码
    data_e, label_e, seq_length, temp = [], [], [], []
    sign, b = 0, 0
    for i in range(len(data)):
        length = len(data[i])
        if len(data[i]) > max_len:  
            continue
        element, st = [], data[i].strip()
        for j in st:
            if j not in amino_acids:  
                sign = 1
                break
            index = amino_acids.index(j)  
            element.append(index)  
            sign = 0

        if length <= max_len and sign == 0:  
            temp.append(element)
            seq_length.append(len(temp[b])) 
            b += 1
            element += [0] * (max_len - length)  
            data_e.append(element)
            label_e.append(label[i])
        # else:
    return torch.LongTensor(np.array(data_e)), torch.LongTensor(np.array(label_e))
def LabelEmbeddingData(x_train, y_train):
    label_input = np.ones((y_train.shape[0], 21))
    return x_train,y_train,torch.LongTensor(np.array(label_input))

def data_load(train_direction=None, test_direction=None, batch=None, subtest=True, CV=False):
    dataset_train, dataset_test = [], []
    dataset_subtest = None
    weight = None
    # 加载数据
    train_seq_data, train_seq_label, max_len_train, min_len_train = getSequenceData(train_direction)
    test_seq_data, test_seq_label, max_len_test, min_len_test = getSequenceData(test_direction)
    print(f"max_length_train:{max_len_train}")
    print(f"min_length_train:{min_len_train}")
    print(f"max_length_test:{max_len_test}")
    print(f"min_length_test:{min_len_test}")
    x_train, y_train= PadEncode(train_seq_data, train_seq_label, max_len_train)
    x_train,y_train,label_input=LabelEmbeddingData(x_train, y_train)
    #print(train_length.shape)
    x_test, y_test= PadEncode(test_seq_data, test_seq_label, max_len_test)
    x_test, y_test, testlabel_input= LabelEmbeddingData(x_test, y_test)
    # Create datasets
    train_data = TensorDataset(x_train,  y_train,label_input)
    test_data = TensorDataset(x_test, y_test,testlabel_input)
    dataset_train.append(DataLoader(train_data, batch_size=batch, shuffle=True))
    dataset_test.append(DataLoader(test_data, batch_size=batch, shuffle=True))
    return dataset_train, dataset_test, dataset_subtest, weight

train_datasets, test_datasets, subtests, weight = data_load(batch=256,
                                                                train_direction='dataset/train.txt',
                                                                test_direction='dataset/test.txt',
                                                                subtest=False,
                                                                CV=False)  

max_length_train:50
min_length_train:4
max_length_test:50
min_length_test:5


In [4]:
def predict(model, data, device="cuda"):
    
    model.to(device)
    model.eval()  
    predictions = []
    labels = []
    with torch.no_grad(): 
        get_random_seed(20230226)
        for test_data, test_label,label_input in data:
            x = test_data.to(device)
            label_input = label_input.to(device)
            test_label = test_label.to(device)
            x,_,_,_=model(x, label_input) 
            predict = torch.sigmoid(x)  
            predictions.extend(predict.tolist())
            labels.extend(test_label.tolist())

    return np.array(predictions), np.array(labels)

In [5]:
for i in range(len(test_datasets)):
    test_dataset = test_datasets[i]
    test_labels = []
    for x, y, z in test_dataset:
        test_labels.extend(y.tolist())
    test_dataset = test_datasets[i]
model_predictions, true_labels = predict(model, test_dataset, device=DEVICE)  

  return F.conv1d(input, weight, bias, self.stride,


In [None]:
import math
def AbsoluteTrue(y_hat, y):
    """
    same
    """

    n, m = y_hat.shape
    score_k = 0
    for v in range(n):
        if list(y_hat[v]) == list(y[v]):
            score_k += 1
    return score_k / n
def evaluate1(score_label, y, threshold=0.6):
    y_hat = score_label
    for i in range(len(y_hat)):
        for j in range(len(y_hat[i])):
            if y_hat[i][j] < threshold:  # threshold
                y_hat[i][j] = 0
            else:
                y_hat[i][j] = 1
    absolutetrue = AbsoluteTrue(y_hat, y)
    # 向上取整并保留三位小数
    print("absolutetrue is,", absolutetrue)
   
evaluate1(model_predictions, true_labels)

absolutetrue is, 0.64
