# import

In [1]:
import math
import time
import json
import torch
import random
import pickle
import torch.nn as nn
import numpy as np
import pandas as pd
from tqdm import tqdm
from functools import partial
import torch.optim as optim
from torch.utils import data
from torch.utils.data import random_split
from torch.utils.data import DataLoader
from functools import partialmethod
from scipy.stats import truncnorm
from sklearn.metrics import roc_auc_score

# DataLoader

In [2]:
Seq_Coding={'A':[1.,0.,0.,0.],'T':[0.,1.,0.,0.],'C':[0.,0.,1.,0.],'G':[0.,0.,0.,1.],'N':[0.25,0.25,0.25,0.25]}
class NanoDataset(data.Dataset):
    def __init__(self,use_path):
        self.use_path=use_path
        self.LEN=0
        self.f_dict={}
        with open(use_path+'/use_files.txt','r') as f:
            for line in f.readlines():
                f_name=line.strip()
                self.f_dict[f_name]={'len':0,'label':1,'cont':[]}
                if f_name[-4:]=='_neg':
                    self.f_dict[f_name]['label']=0
                with open(use_path+'/'+f_name+'.index') as f2:
                    for line2 in f2.readlines():
                        items2=line2.strip().split('\t')
                        self.f_dict[f_name]['cont'].append((int(items2[1]),int(items2[2])))
                self.f_dict[f_name]['len']=len(self.f_dict[f_name]['cont'])
                self.LEN+=self.f_dict[f_name]['len']
    def __getitem__(self,index):
        R_dict={'seq_feature':[],'seq_mask':[],'nano_feature':[],'nano_mask':[],'label':0}
        for key in self.f_dict:
            if index>=self.f_dict[key]['len']:
                index-=self.f_dict[key]['len']
                continue
            label=self.f_dict[key]['label']
            R_dict['label']=label
            seekit=self.f_dict[key]['cont'][index]
            with open(self.use_path+'/'+key+'.json') as f:
                f.seek(seekit[0],0)
                json_str=f.read(seekit[1]-seekit[0])
                Ls=json_str.strip().split('\n')
                for each in json.loads(Ls[0]):
                    R_dict['seq_feature'].append(Seq_Coding[each])
                    if each=='N':
                        R_dict['seq_mask'].append(0)
                    else:
                        R_dict['seq_mask'].append(1)

                for L in Ls[1:]:
                    L_data=json.loads(L)
                    t_feature=[]
                    t_mask=[]
                    for each in L_data:
                        if each[0]<0:
                            t_feature.append([0,0,0])
                            t_mask.append(0)
                        else:
                            t_feature.append(each)
                            t_mask.append(1)
                    R_dict['nano_mask'].append(t_mask)
                    R_dict['nano_feature'].append(t_feature)
            break
        for key2 in R_dict:
            R_dict[key2]=torch.tensor(R_dict[key2])
        return R_dict
    def __len__(self):
        return self.LEN

In [31]:
m6A_Nano_set=NanoDataset('./edata/DataSet/m6A')
len(m6A_Nano_set)

7456

In [4]:
RELOAD=0
if RELOAD==1:
    m6A_Nano_set=NanoDataset('./edata/DataSet/m6A')
    train_size=int(len(m6A_Nano_set)*0.8)
    test_size=len(m6A_Nano_set)-train_size
    m6A_Nano_train_set,m6A_Nano_test_set=torch.utils.data.random_split(m6A_Nano_set,[train_size,test_size])
    with open('./edata/Save_DataSet/m6A_Nano_train_set.pkl','wb') as f:
        pickle.dump(m6A_Nano_train_set,f)
    with open('./edata/Save_DataSet/m6A_Nano_test_set.pkl','wb') as f:
        pickle.dump(m6A_Nano_test_set,f)
    m6A_Nano_train_loader=DataLoader(m6A_Nano_train_set,batch_size=5,shuffle=True)
    m6A_Nano_test_loader=DataLoader(m6A_Nano_test_set,batch_size=5,shuffle=True)

else:
    with open('./edata/Save_DataSet/m6A_Nano_train_set.pkl','rb') as f:
        m6A_Nano_train_set=pickle.load(f)
    with open('./edata/Save_DataSet/m6A_Nano_test_set.pkl','rb') as f:
        m6A_Nano_test_set=pickle.load(f)
    m6A_Nano_train_loader=DataLoader(m6A_Nano_train_set,batch_size=5,shuffle=True)
    m6A_Nano_test_loader=DataLoader(m6A_Nano_test_set,batch_size=5,shuffle=True)

In [1]:
##Extra: For box plot drawing
#Run these part only if needed

In [2]:
def for_box_plot(dataset,required_K5,name):
    plot_frame=pd.DataFrame(columns=['feature','value','position','modification'])
    L=len(dataset)
    count=0
    for el in tqdm(dataset):
        count+=1
        if count>L:
            break
        K5=''
        mid=int((len(el['seq_feature'])-1)/2)
        for each in el['seq_feature'][mid-2:mid+2+1]:
            if each[0].item()==1:
                K5+='A'
            elif each[1].item()==1:
                K5+='T'
            elif each[2].item()==1:
                K5+='C'
            elif each[3].item()==1:
                K5+='G'
        if K5!=required_K5:
            continue

        for each_feature in el['nano_feature']:
            mid=int((len(each_feature)-1)/2)
            for rel in [-2,-1,0,1,2]:
                Lis=list(each_feature[mid+rel].numpy())
                if sum(Lis)>0.01:
                    plot_frame.loc[len(plot_frame)]=['event_mean',Lis[0],rel,el['label'].item()]
                    plot_frame.loc[len(plot_frame)]=['event_stdv',Lis[1],rel,el['label'].item()]
                    plot_frame.loc[len(plot_frame)]=['event_length',Lis[2],rel,el['label'].item()]
    plot_frame.to_csv('./edata/Save_for_drawing/'+name+'_box_plot.csv',index=False,sep=',')

In [33]:
#box_plot for GGACT
sample_set,_=random_split(dataset=m6A_Nano_set,lengths=[2000,len(m6A_Nano_set)-2000])
for_box_plot(sample_set,'GGACT','m6A_GGACT')

100%|████████████████████████████████████████████████████████████████████████████| 2000/2000 [1:11:36<00:00,  2.15s/it]


In [36]:
#box_plot for GAACT
sample_set,_=random_split(dataset=m6A_Nano_set,lengths=[2000,len(m6A_Nano_set)-2000])
for_box_plot(sample_set,'GAACT','m6A_GAACT')

100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [23:23<00:00,  1.43it/s]


# Tools for Model

In [5]:
def glorot_uniform_init_(weights):
    nn.init.xavier_uniform_(weights,gain=1)
def zero_init_(weights):
    with torch.no_grad():
        weights.fill_(0.0)
def permute_final_dims(tensor,inds):
    zero_index=-1*len(inds)
    first_inds=list(range(len(tensor.shape[:zero_index])))
    return tensor.permute(first_inds+[zero_index+i for i in inds])
def flatten_final_dims(t,no_dims):
    return t.reshape(t.shape[:-no_dims]+(-1,))
def relu_init_(weights,scale=2.0):
    shape=weights.shape
    _,f=shape
    scale=scale/max(1,f)
    a=-2
    b=2
    std=math.sqrt(scale)/truncnorm.std(a=a,b=b,loc=0,scale=1)
    size=1
    for n in shape:
        size=size*n
    samples=truncnorm.rvs(a=a,b=b,loc=0,scale=std,size=size)
    samples=np.reshape(samples,shape)
    with torch.no_grad():
        weights.copy_(torch.tensor(samples,device=weights.device))

class Dropout(nn.Module):
    def __init__(self,r,batch_dim):
        super(Dropout,self).__init__()
        self.r=r
        if type(batch_dim)==int:
            batch_dim=[batch_dim]
        self.batch_dim=batch_dim
        self.dropout=nn.Dropout(r)
    def forward(self,x):
        shape=list(x.shape)
        if self.batch_dim is not None:
            for bd in self.batch_dim:
                shape[bd]=1
        mask=x.new_ones(shape)
        mask=self.dropout(mask)
        x*=mask
        return x
class DropoutRowwise(Dropout):
    __init__=partialmethod(Dropout.__init__,batch_dim=-3)
class DropoutColwise(Dropout):
    __init__=partialmethod(Dropout.__init__,batch_dim=-2)

In [8]:
class Linear(nn.Linear):
    def __init__(self,in_dim,out_dim,bias=True,init="zero"):
        super(Linear, self).__init__(in_dim, out_dim, bias=bias)
        if bias:
            with torch.no_grad():
                self.bias.fill_(0)
        with torch.no_grad():
            if init=="zero":
                zero_init_(self.weight)
            elif init=="glorot":
                glorot_uniform_init_(self.weight)
            elif init=="relu":
                relu_init_(self.weight)
            elif init=="gating":
                zero_init_(self.weight)
                if bias:
                    self.bias.fill_(1.0)
            else:
                 zero_init_(self.weight)

class LayerNorm(nn.Module):
    def __init__(self,c_in,eps=1e-5):
        super(LayerNorm, self).__init__()
        self.c_in=(c_in,)
        self.eps=eps
        self.weight=nn.Parameter(torch.ones(c_in))
        self.bias=nn.Parameter(torch.zeros(c_in))
    def forward(self,x): 
        out=nn.functional.layer_norm(x,self.c_in,self.weight,self.bias,self.eps)
        return out

In [9]:
class LinearEmbedder(nn.Module):
    def __init__(self,c_in,c_out):
        super(LinearEmbedder,self).__init__()
        self.c_in=c_in
        self.c_out=c_out
        self.linear_1=nn.Linear(c_in,c_out)
        self.relu=nn.ReLU()
        self.linear_2=nn.Linear(c_out,c_out)
    def forward(self,x):
        x=self.linear_1(x)
        x=self.relu(x)
        x=self.linear_2(x)
        return x

# Model Component

In [10]:
MAX_SEQ_LEN=50
def precompute_freqs_cis(dim,seq_len,theta=10000.0):
    freqs=1.0/(theta**(torch.arange(0,dim,2)[:(dim//2)].float()/dim))
    t=torch.arange(seq_len,device=freqs.device)
    freqs=torch.outer(t,freqs).float()
    freqs_cis=torch.polar(torch.ones_like(freqs),freqs)
    return freqs_cis

def apply_rotary_emb(q,k,freqs_cis,same=True):
    _q=q.float().reshape(*q.shape[:-1],-1,2)
    _k=k.float().reshape(*k.shape[:-1],-1,2)
    _q=torch.view_as_complex(_q)
    _k=torch.view_as_complex(_k)
    
    if same==False:
        if _k.shape[-2]%2!=0:
            q_out=torch.view_as_real(_q*freqs_cis[int((_k.shape[-2]-1)/2)].to(q.device)).flatten(-2)
        else:
            q_out=torch.view_as_real(_q*freqs_cis[_k.shape[-2]/2].to(q.device)).flatten(-2)
    else:
        q_out=torch.view_as_real(_q*freqs_cis[:_q.shape[-2]].to(q.device)).flatten(-2)
    k_out=torch.view_as_real(_k*freqs_cis[:_k.shape[-2]].to(k.device)).flatten(-2)
    return q_out.type_as(q),k_out.type_as(k)

In [11]:
class Attention(nn.Module):
    def __init__(self,c_q,c_k,c_v,c_hidden,no_heads,gating=True,use_rel_pos=False):
        super(Attention, self).__init__()
        self.c_q=c_q
        self.c_k=c_k
        self.c_v=c_v
        self.c_hidden=c_hidden
        self.no_heads=no_heads
        self.gating=gating
        self.use_rel_pos=use_rel_pos

        self.linear_q=Linear(c_q,c_hidden*no_heads,bias=False,init="glorot")
        self.linear_k=Linear(c_k,c_hidden*no_heads,bias=False,init="glorot")
        self.linear_v=Linear(c_v,c_hidden*no_heads,bias=False,init="glorot")
        self.linear_o=Linear(c_hidden*no_heads,c_q,init="zero")
        if self.gating:
            self.linear_g=Linear(c_q,c_hidden*no_heads,init="gating")
        self.sigmoid=nn.Sigmoid()

        self.freqs_cis=None
        if self.use_rel_pos:
            self.freqs_cis=precompute_freqs_cis(c_hidden,MAX_SEQ_LEN)

    def forward(self,q_x,kv_x,biases=None):
        if(biases is None):
            biases=[]
        q=self.linear_q(q_x)
        k=self.linear_k(kv_x)
        v=self.linear_v(kv_x)
        q=q.view(q.shape[:-1]+(self.no_heads,-1))
        k=k.view(k.shape[:-1]+(self.no_heads,-1))
        v=v.view(v.shape[:-1]+(self.no_heads,-1))

        q=q.transpose(-2,-3)#r,H,s,h
        k=k.transpose(-2,-3)
        v=v.transpose(-2,-3)
        
        if self.use_rel_pos:
            q,k=apply_rotary_emb(q,k,freqs_cis=self.freqs_cis,same=True)
        k=permute_final_dims(k,(1,0))
        a=torch.matmul(q,k)/math.sqrt(self.c_hidden)#r,H,s,h * r,H,h,s = r,H,s,s
        for b in biases:
            a+=b
        a=torch.nn.functional.softmax(a,dim=-1)
        o=torch.matmul(a,v)#r,H,s,s * r,H,s,h = r,H,s,h
        o=o.transpose(-2,-3)#r,s,H,h

        if self.gating:
            g=self.sigmoid(self.linear_g(q_x))
            g=g.view(g.shape[:-1]+(self.no_heads,-1))
            o=o*g
        o=flatten_final_dims(o,2)#r,s,H*h
        o=self.linear_o(o)#r,s,o
        return o

In [12]:
class NanoAttention(nn.Module):
    def __init__(self,c_in,c_hidden,no_heads,inf=1e9,use_rel_pos=False):
        super(NanoAttention,self).__init__()
        self.c_in=c_in
        self.c_hidden=c_hidden
        self.no_heads=no_heads
        self.inf=inf
        self.use_rel_pos=use_rel_pos
        self.layer_norm_x=LayerNorm(c_in)
        self.mha=Attention(c_in,c_in,c_in,c_hidden,no_heads,True,use_rel_pos)

    def forward(self,x,mask=None):
        n_seq,n_pos=x.shape[-3:-1]
        if mask is None:
            mask=x.new_ones(x.shape[:-3]+(n_seq,n_pos))
        mask_bias=(self.inf*(mask-1))[...,:,None,None,:]
        biases=[mask_bias]

        x=self.layer_norm_x(x)
        x=self.mha(x,x,biases)
        return x

class Trans_NanoAttention(nn.Module):
    def __init__(self,c_in,c_hidden,no_heads,inf=1e9,use_rel_pos=False):
        super(Trans_NanoAttention,self).__init__()
        self.c_in=c_in
        self.c_hidden=c_hidden
        self.no_heads=no_heads
        self.inf=inf
        self.use_rel_pos=use_rel_pos
        self._NanoAttention=NanoAttention(c_in,c_hidden,no_heads,inf,use_rel_pos)

    def forward(self,x,mask=None):
        x=x.transpose(-2,-3)
        if mask is not None:
            mask=mask.transpose(-1,-2)
        x=self._NanoAttention(x,mask=mask)

        x=x.transpose(-2,-3)
        if mask is not None:
            mask=mask.transpose(-1,-2)
        return x

In [13]:
class GlobalAttention(nn.Module):
    def __init__(self,c_in,c_hidden,no_heads,inf=1e5,eps=1e-8,use_rel_pos=False):
        super(GlobalAttention,self).__init__()
        self.c_in=c_in
        self.c_hidden=c_hidden
        self.no_heads=no_heads
        self.inf=inf
        self.eps=eps
        self.use_rel_pos=use_rel_pos
        
        self.linear_q=Linear(c_in,c_hidden*no_heads,bias=False,init="glorot")
        self.linear_k=Linear(c_in,c_hidden,bias=False,init="glorot")
        self.linear_v=Linear(c_in,c_hidden,bias=False,init="glorot")
        self.linear_g=Linear(c_in,c_hidden*no_heads,init="gating")
        self.linear_o=Linear(c_hidden*no_heads,c_in,init="zero")
        self.sigmoid=nn.Sigmoid()
        self.freqs_cis=None
        if self.use_rel_pos:
            self.freqs_cis=precompute_freqs_cis(c_hidden,MAX_SEQ_LEN)
    def forward(self,m,mask):
        q=torch.sum(m*mask.unsqueeze(-1),dim=-2)/(torch.sum(mask,dim=-1)[...,None]+self.eps)
        q=self.linear_q(q)
        k=self.linear_k(m)#r,s,h
        v=self.linear_v(m)#r,s,h
        q=q.view(q.shape[:-1]+(self.no_heads,-1))#r,H,h
        if self.use_rel_pos:
            q,k=apply_rotary_emb(q,k,freqs_cis=self.freqs_cis)
        
        bias=(self.inf*(mask-1))[...,:,None,:]
        a=torch.matmul(q,k.transpose(-1,-2))/math.sqrt(self.c_hidden)#r,H,h * r,h,s = r,H,s
        a+=bias
        a=torch.nn.functional.softmax(a,dim=-1)
        
        o=torch.matmul(a,v)#r,H,s * r,s,h = r,H,h
        g=self.sigmoid(self.linear_g(m))
        g=g.view(g.shape[:-1]+(self.no_heads,-1))
        o=o.unsqueeze(-3)*g#r,1,H,h * r,s,H,h = r,s,H,h
        o=o.reshape(o.shape[:-2]+(-1,))
        
        m=self.linear_o(o)#r,s,H*h->r,s,c_in
        return m

In [14]:
class GlobalNanoAttention(nn.Module):
    def __init__(self,c_in,c_hidden,no_heads,inf=1e9,eps=1e-8,use_rel_pos=False):
        super(GlobalNanoAttention,self).__init__()
        self.c_in=c_in
        self.c_hidden=c_hidden
        self.no_heads=no_heads
        self.inf=inf
        self.use_rel_pos=use_rel_pos
        self.layer_norm_x=LayerNorm(c_in)
        self.gmha=GlobalAttention(c_in,c_hidden,no_heads,inf,eps,use_rel_pos)

    def forward(self,x,mask=None):
        n_seq,n_pos=x.shape[-3:-1]
        if mask is None:
            mask=x.new_ones(x.shape[:-3]+(n_seq,n_pos))
        x=self.layer_norm_x(x)
        x=self.gmha(x,mask)
        return x

class Trans_GlobalNanoAttention(nn.Module):
    def __init__(self,c_in,c_hidden,no_heads,inf=1e9,eps=1e-8,use_rel_pos=False):
        super(Trans_GlobalNanoAttention,self).__init__()
        self.c_in=c_in
        self.c_hidden=c_hidden
        self.no_heads=no_heads
        self.inf=inf
        self.use_rel_pos=use_rel_pos
        self._GlobalNanoAttention=GlobalNanoAttention(c_in,c_hidden,no_heads,inf,eps,use_rel_pos)

    def forward(self,x,mask=None):
        x=x.transpose(-2,-3)
        if mask is not None:
            mask=mask.transpose(-1,-2)
        x=self._GlobalNanoAttention(x,mask=mask)
        x=x.transpose(-2,-3)
        if mask is not None:
            mask=mask.transpose(-1,-2)
        return x

In [15]:
class LineAttention(nn.Module):
    def __init__(self,c_in,c_hidden,no_heads,inf=1e5,eps=1e-8,use_rel_pos=False):
        super(LineAttention,self).__init__()
        self.c_in=c_in
        self.c_hidden=c_hidden
        self.no_heads=no_heads
        self.inf=inf
        self.eps=eps
        self.use_rel_pos=use_rel_pos
        
        self.linear_q0=Linear(c_in,c_hidden*no_heads,bias=False,init="glorot")
        self.linear_k0=Linear(c_in,c_hidden,bias=False,init="glorot")
        self.linear_v0=Linear(c_in,c_hidden,bias=False,init="glorot")
        self.linear_q1=Linear(c_hidden,c_hidden,bias=False,init="glorot")
        self.linear_k1=Linear(c_hidden,c_hidden,bias=False,init="glorot")
        self.linear_v1=Linear(c_hidden,c_hidden,bias=False,init="glorot")
        self.linear_g=Linear(c_in,c_hidden*no_heads,init="gating")
        self.linear_q2=Linear(c_in,c_hidden*no_heads,bias=False,init="glorot")
        self.linear_k2=Linear(c_in,c_hidden*no_heads,bias=False,init="glorot")
        self.linear_v2=Linear(c_in,c_hidden*no_heads,bias=False,init="glorot")
        self.linear_o=Linear(c_hidden*no_heads,c_in,init="zero")
        self.sigmoid=nn.Sigmoid()
        self.freqs_cis=precompute_freqs_cis(c_hidden,MAX_SEQ_LEN)
    def forward(self,m,mask):
        l_sum=torch.sum(m*mask.unsqueeze(-1),dim=-2)/(torch.sum(mask,dim=-1)[...,None]+self.eps)
        q0=self.linear_q0(l_sum)
        k0=self.linear_k0(m)#r,s,h
        v0=self.linear_v0(m)#r,s,h
        q0=q0.view(q0.shape[:-1]+(self.no_heads,-1))#r,H,h
        if self.use_rel_pos:
            q0,k0=apply_rotary_emb(q0,k0,freqs_cis=self.freqs_cis,same=False)#r,H,h;r,s,h
        bias=(self.inf*(mask-1))[...,:,None,:]
        a0=torch.matmul(q0,k0.transpose(-1,-2))/math.sqrt(self.c_hidden)#r,H,h * r,h,s = r,H,s
        a0+=bias
        a0=torch.nn.functional.softmax(a0,dim=-1)
        r0=torch.matmul(a0,v0)#r,H,s * r,s,h = r,H,h
        
        q1=self.linear_q1(r0)
        k1=self.linear_q1(r0)
        v1=self.linear_q1(r0)
        q1=q1.transpose(-2,-3)
        k1=k1.transpose(-2,-3)
        v1=v1.transpose(-2,-3)
        if not self.use_rel_pos:
            q1,k1=apply_rotary_emb(q1,k1,freqs_cis=self.freqs_cis,same=True)#H,r,h;H,r,h
        a1=torch.matmul(q1,k1.transpose(-1,-2))/math.sqrt(self.c_hidden)#H,r,h * H,h,r = H,r,r
        a1=torch.nn.functional.softmax(a1,dim=-1)
        r1=torch.matmul(a1,v1)#H,r,r * H,r,h = H,r,h
        
        q2=self.linear_q2(m)
        k2=self.linear_k2(m)
        v2=self.linear_v2(m)
        q2=q2.view(q2.shape[:-1]+(self.no_heads,-1))
        k2=k2.view(k2.shape[:-1]+(self.no_heads,-1))
        v2=v2.view(v2.shape[:-1]+(self.no_heads,-1))
        q2=q2.transpose(-2,-3)#r,H,s,h
        k2=k2.transpose(-2,-3)
        v2=v2.transpose(-2,-3)
        if self.use_rel_pos:
            q2,k2=apply_rotary_emb(q2,k2,freqs_cis=self.freqs_cis,same=True)
            
        bias2=(self.inf*(mask-1))[...,:,None,None,:]
        a2=torch.matmul(q2,k2.transpose(-1,-2))/math.sqrt(self.c_hidden)#r,H,s,h * r,H,h,s = r,H,s,s
        a2+=bias2
        a2=torch.nn.functional.softmax(a2,dim=-1)
        r2=torch.matmul(a2,v2)#r,H,s,s * r,H,s,h = r,H,s,h
        r2=r2.transpose(-3,-4)#H,r,s,h
        #
        
        g=self.sigmoid(self.linear_g(m))
        g=g.view(g.shape[:-1]+(self.no_heads,-1))
        g=g.transpose(-2,-3)
        g=g.transpose(-3,-4)

        if self.use_rel_pos:
            r1,_=apply_rotary_emb(r1,r2,freqs_cis=self.freqs_cis,same=False)#r,H,h;r,s,h
        r=(r1.unsqueeze(-2)+r2)*g#(H,r,1,h+H,r,s,h)*H,r,s,h=H,r,s,h
        r=r.transpose(-3,-4)
        r=r.transpose(-2,-3)
        r=r.reshape(r.shape[:-2]+(-1,))
        m=self.linear_o(r)#r,s,H*h->r,s,c_in
        return m

In [16]:
class LineNanoAttention(nn.Module):
    def __init__(self,c_in,c_hidden,no_heads,inf=1e9,eps=1e-8,use_rel_pos=False):
        super(LineNanoAttention,self).__init__()
        self.c_in=c_in
        self.c_hidden=c_hidden
        self.no_heads=no_heads
        self.inf=inf
        self.use_rel_pos=use_rel_pos
        self.layer_norm_x=LayerNorm(c_in)
        self.lmha=LineAttention(c_in,c_hidden,no_heads,inf,eps,use_rel_pos)

    def forward(self,x,mask=None):
        n_seq,n_pos=x.shape[-3:-1]
        if mask is None:
            mask=x.new_ones(x.shape[:-3]+(n_seq,n_pos))
        x=self.layer_norm_x(x)
        x=self.lmha(x,mask)
        return x

class Trans_LineNanoAttention(nn.Module):
    def __init__(self,c_in,c_hidden,no_heads,inf=1e9,eps=1e-8,use_rel_pos=False):
        super(Trans_LineNanoAttention,self).__init__()
        self.c_in=c_in
        self.c_hidden=c_hidden
        self.no_heads=no_heads
        self.inf=inf
        self.use_rel_pos=use_rel_pos
        self._LineNanoAttention=LineNanoAttention(c_in,c_hidden,no_heads,inf,eps,use_rel_pos)

    def forward(self,x,mask=None):
        x=x.transpose(-2,-3)
        if mask is not None:
            mask=mask.transpose(-1,-2)
        x=self._LineNanoAttention(x,mask=mask)
        x=x.transpose(-2,-3)
        if mask is not None:
            mask=mask.transpose(-1,-2)
        return x

# Build Model

In [17]:
class NanoBlock(nn.Module):
    def __init__(self,c_x,c_hidden_att,no_heads,row_dropout,col_dropout,transition_n,inf,eps):
        super(NanoBlock,self).__init__()
        self.att_row=NanoAttention(c_x,c_hidden_att,no_heads,inf,use_rel_pos=True)
        self.att_col=Trans_NanoAttention(c_x,c_hidden_att,no_heads,inf,use_rel_pos=False)
        self.row_dropout_layer=DropoutRowwise(row_dropout)
        self.col_dropout_layer=DropoutColwise(col_dropout)

        self.layer_norm=LayerNorm(c_x)
        self.linear_1=Linear(c_x,transition_n*c_x,init="relu")
        self.relu=nn.ReLU()
        self.linear_2=Linear(transition_n*c_x,c_x,init="zero")
    def _transition(self,x):
        x=self.layer_norm(x)
        x=self.linear_1(x)
        x=self.relu(x)
        x=self.linear_2(x)
        return x

    def forward(self,x,x_mask):
        x=x+self.row_dropout_layer(self.att_row(x,x_mask))
        x=x+self.col_dropout_layer(self.att_col(x,x_mask))
        x=x+self._transition(x)
        return x

In [18]:
class NanoGlobalBlock(nn.Module):
    def __init__(self,c_x,c_hidden_att,no_heads,row_dropout,col_dropout,transition_n,inf,eps):
        super(NanoGlobalBlock,self).__init__()
        self.gatt_row=GlobalNanoAttention(c_x,c_hidden_att,no_heads,inf,eps,use_rel_pos=True)
        self.gatt_col=Trans_GlobalNanoAttention(c_x,c_hidden_att,no_heads,inf,eps,use_rel_pos=False)
        self.row_dropout_layer=DropoutRowwise(row_dropout)
        self.col_dropout_layer=DropoutColwise(col_dropout)

        self.layer_norm=LayerNorm(c_x)
        self.linear_1=Linear(c_x,transition_n*c_x,init="relu")
        self.relu=nn.ReLU()
        self.linear_2=Linear(transition_n*c_x,c_x,init="zero")
    def _transition(self,x):
        x=self.layer_norm(x)
        x=self.linear_1(x)
        x=self.relu(x)
        x=self.linear_2(x)
        return x

    def forward(self,x,x_mask):
        x=x+self.row_dropout_layer(self.gatt_row(x,x_mask))
        x=x+self.col_dropout_layer(self.gatt_col(x,x_mask))
        x=x+self._transition(x)
        return x

In [19]:
class NanoLineBlock(nn.Module):
    def __init__(self,c_x,c_hidden_att,no_heads,row_dropout,col_dropout,transition_n,inf,eps):
        super(NanoLineBlock,self).__init__()
        self.latt_row=LineNanoAttention(c_x,c_hidden_att,no_heads,inf,eps,use_rel_pos=True)
        self.latt_col=Trans_LineNanoAttention(c_x,c_hidden_att,no_heads,inf,eps,use_rel_pos=False)
        self.row_dropout_layer=DropoutRowwise(row_dropout)
        self.col_dropout_layer=DropoutColwise(col_dropout)

        self.layer_norm=LayerNorm(c_x)
        self.linear_1=Linear(c_x,transition_n*c_x,init="relu")
        self.relu=nn.ReLU()
        self.linear_2=Linear(transition_n*c_x,c_x,init="zero")
    def _transition(self,x):
        x=self.layer_norm(x)
        x=self.linear_1(x)
        x=self.relu(x)
        x=self.linear_2(x)
        return x

    def forward(self,x,x_mask):
        x=x+self.row_dropout_layer(self.latt_row(x,x_mask))
        x=x+self.col_dropout_layer(self.latt_col(x,x_mask))
        x=x+self._transition(x)
        return x

In [20]:
class NanoStack(nn.Module):
    def __init__(self,c_x,c_hidden_att,no_heads,blocks_lis,
        row_dropout,col_dropout,transition_n,
        inf,eps,clear_cache_between_blocks=False):
        super(NanoStack,self).__init__()
        self.clear_cache_between_blocks=clear_cache_between_blocks
        self.blocks=nn.ModuleList()
        for block_type in blocks_lis:
            if block_type==0:
                block=NanoBlock(c_x,c_hidden_att,no_heads,row_dropout,col_dropout,transition_n,inf,eps)
            elif block_type==1:
                block=NanoGlobalBlock(c_x,c_hidden_att,no_heads,row_dropout,col_dropout,transition_n,inf,eps)
            elif block_type==2:
                block=NanoLineBlock(c_x,c_hidden_att,no_heads,row_dropout,col_dropout,transition_n,inf,eps)
            self.blocks.append(block)

    def _prep_blocks(self,x_mask):
        blocks=[partial(b,x_mask=x_mask)for b in self.blocks]
        if(self.clear_cache_between_blocks):
            def block_with_cache_clear(block,*args,**kwargs):
                torch.cuda.empty_cache()
                return block(*args,**kwargs)
            blocks=[partial(block_with_cache_clear,b) for b in blocks]
        return blocks

    def forward(self,x,x_mask):
        blocks=self._prep_blocks(x_mask)
        for block in blocks:
            x=block(x)
        return x

In [21]:
class Nano(nn.Module):
    def __init__(self,c_s,c_x,c_emb,c_f,c_hidden_att,c_o,no_heads,blocks_lis,
                row_dropout,col_dropout,transition_n,inf=1e9,eps=1e-8,clear_cache_between_blocks=False):
        super(Nano,self).__init__()
        self.s_embedder=LinearEmbedder(c_s,c_emb)
        self.x_embedder=LinearEmbedder(c_x,c_emb)
        self.stack=NanoStack(c_emb,c_hidden_att,no_heads,blocks_lis,
                     row_dropout,col_dropout,transition_n,inf,eps,clear_cache_between_blocks)
        self.linear_f=Linear(c_emb,c_f)
        self.classifier=nn.Sequential(
            nn.Linear(c_f,int(c_f/2)),
            nn.ReLU(),
            nn.Linear(int(c_f/2),c_o),
            nn.Sigmoid()
        )
    def forward(self,s,x,s_mask,x_mask):
        s=self.s_embedder(s)
        x=self.x_embedder(x)
        
        x=torch.cat([s.unsqueeze(-3),x],dim=-3)
        x_mask=torch.cat([s_mask.unsqueeze(-2),x_mask],dim=-2)

        x=self.stack(x,x_mask)
        x=self.linear_f(torch.mean(x[...,:,int(x.shape[-2]/2)+1,:],-2))
        #x=self.linear_f(torch.max(x[...,:,int(x.shape[-2]/2)+1,:],-2)[0])
        o=self.classifier(x).squeeze(-1)
        return o

# For Train and Test

In [23]:
def test(model,test_loader,device,line_reduce=0,col_reduce=0):
    model.eval()
    right_count,all_count=0,0
    prob_all,by_all=[],[]
    with torch.no_grad():
        for _,l_dic in enumerate(test_loader):
            by=l_dic['label'].to(device)
            by=by.to(torch.int64)
            if line_reduce==0:
                seq_feature=l_dic['seq_feature'].to(device)
                seq_mask=l_dic['seq_mask'].to(device)
                nano_feature=l_dic['nano_feature'][:,col_reduce:].to(device)
                nano_mask=l_dic['nano_mask'][:,col_reduce:].to(device)
            else:
                side_reduce=int(line_reduce/2)
                seq_feature=l_dic['seq_feature'][:,side_reduce:-side_reduce].to(device)
                seq_mask=l_dic['seq_mask'][:,side_reduce:-side_reduce].to(device)
                nano_feature=l_dic['nano_feature'][:,col_reduce:,side_reduce:-side_reduce].to(device)
                nano_mask=l_dic['nano_mask'][:,col_reduce:,side_reduce:-side_reduce].to(device)
            ry=model(seq_feature,nano_feature,seq_mask,nano_mask)
            out_y=ry>0.5
            right_count+=out_y.eq(by).sum()
            all_count+=len(by)
            for each in ry:
                prob_all.append(np.array(each.cpu()))
            for each in by:
                by_all.append(np.array(each.cpu()))
    roauc=roc_auc_score(by_all,prob_all)

    accuracy=100*(right_count/all_count).item()
    print('AUC:{:.4f}   accuracy:{:.4f}%'.format(roauc,accuracy))
    torch.cuda.empty_cache()

def train(model,train_loader,test_loader,device,optimizer,loss_func,epochs,line_reduce=0,col_reduce=0):
    torch.cuda.empty_cache()
    for epoch in range(epochs):
        total_loss=0
        model.train()
        for _,l_dic in enumerate(train_loader):
            by=l_dic['label'].to(device)
            if line_reduce==0:
                seq_feature=l_dic['seq_feature'].to(device)
                seq_mask=l_dic['seq_mask'].to(device)
                nano_feature=l_dic['nano_feature'][:,col_reduce:].to(device)
                nano_mask=l_dic['nano_mask'][:,col_reduce:].to(device)
            else:
                side_reduce=int(line_reduce/2)
                seq_feature=l_dic['seq_feature'][:,side_reduce:-side_reduce].to(device)
                seq_mask=l_dic['seq_mask'][:,side_reduce:-side_reduce].to(device)
                nano_feature=l_dic['nano_feature'][:,col_reduce:,side_reduce:-side_reduce].to(device)
                nano_mask=l_dic['nano_mask'][:,col_reduce:,side_reduce:-side_reduce].to(device)
            ry=model(seq_feature,nano_feature,seq_mask,nano_mask)
            loss=loss_func(ry,by.float())
            total_loss+=loss
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        print('epoch '+str(epoch+1)+' loss:  ',total_loss/len(test_loader))        
        if epoch%10==9:
            print('At epoch '+str(epoch+1),':')
            test(model,test_loader,device,line_reduce,col_reduce)
            torch.save(model.state_dict(),'./model/model_'+str(epoch+1)+'_'+str(int(time.time()))+'.pkl')

In [24]:
def detailed_test(model,test_loader,device,seq_reduce=0,reads_reduce=0,curve_name=None):
    model.eval()
    right_count,all_count=0,0
    more_dict={0.5:[0,0],0.6:[0,0],0.8:[0,0],0.7:[0,0],0.9:[0,0],0.95:[0,0],0.98:[0,0],\
               0.99:[0,0],0.995:[0,0],0.999:[0,0],0.9995:[0,0],0.9999:[0,0],0.99995:[0,0],\
               0.99999:[0,0],0.999995:[0,0],0.999999:[0,0]}
    prob_all,by_all=[],[]
    motif_dict={}
    with torch.no_grad():
        for _,l_dic in enumerate(test_loader):
            by=l_dic['label'].to(device).to(torch.int64)
            if seq_reduce==0:
                seq_feature=l_dic['seq_feature'].to(device)
                seq_mask=l_dic['seq_mask'].to(device)
                nano_feature=l_dic['nano_feature'][:,reads_reduce:].to(device)
                nano_mask=l_dic['nano_mask'][:,reads_reduce:].to(device)
            else:
                side_reduce=int(seq_reduce/2)
                seq_feature=l_dic['seq_feature'][:,side_reduce:-side_reduce].to(device)
                seq_mask=l_dic['seq_mask'][:,side_reduce:-side_reduce].to(device)
                nano_feature=l_dic['nano_feature'][:,reads_reduce:,side_reduce:-side_reduce].to(device)
                nano_mask=l_dic['nano_mask'][:,reads_reduce:,side_reduce:-side_reduce].to(device)
            ry=model(seq_feature,nano_feature,seq_mask,nano_mask)
            out_y=ry>0.5
            right_count+=out_y.eq(by).sum()
            all_count+=len(by)
            for each in ry:
                prob_all.append(np.array(each.cpu()))
            for each in by:
                by_all.append(np.array(each.cpu()))
            for key in more_dict:
                more_dict[key][0]+=((ry>key)&by).sum()
                more_dict[key][1]+=(ry>key).sum()
    if curve_name:
        save_frame=pd.DataFrame({'label':by_all,'pred':prob_all})
        save_frame.to_csv('./edata/Save_for_drawing/'+curve_name+'_curve.csv',index=False,sep=',')

    print('Im total',all_count,'samples:')
    auc=roc_auc_score(by_all,prob_all)
    accuracy=100*(right_count/all_count).item()
    print('AUC:{:.4f}   accuracy:{:.4f}%'.format(auc,accuracy))
    for key in more_dict:
        if more_dict[key][1]>0:
            print('Precision when positive threshold at {:g} is :{:.4f}% (total:{:d})'.format(key,more_dict[key][0]/more_dict[key][1],more_dict[key][1]))
    torch.cuda.empty_cache()

# Use Same information of m6ANet 

In [26]:
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
optimizer=optim.Adam(model.parameters(),lr=0.0001)
loss_func=nn.BCELoss().to(device)
epochs=300
seq_reduce=22
reads_reduce=30
train(model,m6A_Nano_train_loader,m6A_Nano_test_loader,device,optimizer,loss_func,epochs,seq_reduce,reads_reduce)

epoch 1 loss:   tensor(2.7303, device='cuda:0', grad_fn=<DivBackward0>)
epoch 2 loss:   tensor(2.7083, device='cuda:0', grad_fn=<DivBackward0>)
epoch 3 loss:   tensor(2.6999, device='cuda:0', grad_fn=<DivBackward0>)
epoch 4 loss:   tensor(2.6979, device='cuda:0', grad_fn=<DivBackward0>)
epoch 5 loss:   tensor(2.6902, device='cuda:0', grad_fn=<DivBackward0>)
epoch 6 loss:   tensor(2.6910, device='cuda:0', grad_fn=<DivBackward0>)
epoch 7 loss:   tensor(2.6872, device='cuda:0', grad_fn=<DivBackward0>)
epoch 8 loss:   tensor(2.6843, device='cuda:0', grad_fn=<DivBackward0>)
epoch 9 loss:   tensor(2.6741, device='cuda:0', grad_fn=<DivBackward0>)
epoch 10 loss:   tensor(2.6628, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 10 :
AUC:0.6560   accuracy:59.9866%
epoch 11 loss:   tensor(2.6092, device='cuda:0', grad_fn=<DivBackward0>)
epoch 12 loss:   tensor(2.4919, device='cuda:0', grad_fn=<DivBackward0>)
epoch 13 loss:   tensor(2.4009, device='cuda:0', grad_fn=<DivBackward0>)
epoch 14 loss: 

epoch 108 loss:   tensor(1.4806, device='cuda:0', grad_fn=<DivBackward0>)
epoch 109 loss:   tensor(1.4382, device='cuda:0', grad_fn=<DivBackward0>)
epoch 110 loss:   tensor(1.4688, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 110 :
AUC:0.8429   accuracy:76.7426%
epoch 111 loss:   tensor(1.4492, device='cuda:0', grad_fn=<DivBackward0>)
epoch 112 loss:   tensor(1.4068, device='cuda:0', grad_fn=<DivBackward0>)
epoch 113 loss:   tensor(1.4416, device='cuda:0', grad_fn=<DivBackward0>)
epoch 114 loss:   tensor(1.4074, device='cuda:0', grad_fn=<DivBackward0>)
epoch 115 loss:   tensor(1.4947, device='cuda:0', grad_fn=<DivBackward0>)
epoch 116 loss:   tensor(1.4136, device='cuda:0', grad_fn=<DivBackward0>)
epoch 117 loss:   tensor(1.3723, device='cuda:0', grad_fn=<DivBackward0>)
epoch 118 loss:   tensor(1.3775, device='cuda:0', grad_fn=<DivBackward0>)
epoch 119 loss:   tensor(1.3695, device='cuda:0', grad_fn=<DivBackward0>)
epoch 120 loss:   tensor(1.3304, device='cuda:0', grad_fn=<DivBack

epoch 212 loss:   tensor(0.4759, device='cuda:0', grad_fn=<DivBackward0>)
epoch 213 loss:   tensor(0.4566, device='cuda:0', grad_fn=<DivBackward0>)
epoch 214 loss:   tensor(0.4505, device='cuda:0', grad_fn=<DivBackward0>)
epoch 215 loss:   tensor(0.4462, device='cuda:0', grad_fn=<DivBackward0>)
epoch 216 loss:   tensor(0.4705, device='cuda:0', grad_fn=<DivBackward0>)
epoch 217 loss:   tensor(0.4472, device='cuda:0', grad_fn=<DivBackward0>)
epoch 218 loss:   tensor(0.4036, device='cuda:0', grad_fn=<DivBackward0>)
epoch 219 loss:   tensor(0.4767, device='cuda:0', grad_fn=<DivBackward0>)
epoch 220 loss:   tensor(0.4274, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 220 :
AUC:0.8074   accuracy:73.6595%
epoch 221 loss:   tensor(0.4763, device='cuda:0', grad_fn=<DivBackward0>)
epoch 222 loss:   tensor(0.3815, device='cuda:0', grad_fn=<DivBackward0>)
epoch 223 loss:   tensor(0.4153, device='cuda:0', grad_fn=<DivBackward0>)
epoch 224 loss:   tensor(0.4108, device='cuda:0', grad_fn=<DivBack

In [53]:
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
seq_reduce=22
reads_reduce=30
#model.load_state_dict(torch.load('./model/m6A_NSWord_20reads_3sites_keep_model_60_220011.pkl'))
model.load_state_dict(torch.load('./model/m6A_NSWord_20reads_3sites_keep_model_70_220011.pkl'))
detailed_test(model,m6A_Nano_test_loader,device,seq_reduce,reads_reduce,'Blocks=[222000],20reads_3sites')

Im total 1492 samples:
AUC:0.8498   accuracy:76.6086%
Precision when positive threshold at 0.5 is :0.7337% (total:796)
Precision when positive threshold at 0.6 is :0.7706% (total:693)
Precision when positive threshold at 0.8 is :0.8586% (total:382)
Precision when positive threshold at 0.7 is :0.8219% (total:539)
Precision when positive threshold at 0.9 is :0.9037% (total:187)
Precision when positive threshold at 0.95 is :0.9444% (total:90)
Precision when positive threshold at 0.98 is :1.0000% (total:45)
Precision when positive threshold at 0.99 is :1.0000% (total:34)
Precision when positive threshold at 0.995 is :1.0000% (total:28)
Precision when positive threshold at 0.999 is :1.0000% (total:14)
Precision when positive threshold at 0.9995 is :1.0000% (total:11)
Precision when positive threshold at 0.9999 is :1.0000% (total:4)
Precision when positive threshold at 0.99995 is :1.0000% (total:2)
Precision when positive threshold at 0.99999 is :1.0000% (total:1)
Precision when positive thr

# Different reads

In [62]:
#35reads_25sites
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
optimizer=optim.Adam(model.parameters(),lr=0.0001)
loss_func=nn.BCELoss().to(device)
epochs=300
seq_reduce=0
reads_reduce=15
train(model,m6A_Nano_train_loader,m6A_Nano_test_loader,device,optimizer,loss_func,epochs,seq_reduce,reads_reduce)

epoch 1 loss:   tensor(2.7748, device='cuda:0', grad_fn=<DivBackward0>)
epoch 2 loss:   tensor(2.7726, device='cuda:0', grad_fn=<DivBackward0>)
epoch 3 loss:   tensor(2.7710, device='cuda:0', grad_fn=<DivBackward0>)
epoch 4 loss:   tensor(2.7697, device='cuda:0', grad_fn=<DivBackward0>)
epoch 5 loss:   tensor(2.7687, device='cuda:0', grad_fn=<DivBackward0>)
epoch 6 loss:   tensor(2.7331, device='cuda:0', grad_fn=<DivBackward0>)
epoch 7 loss:   tensor(2.7014, device='cuda:0', grad_fn=<DivBackward0>)
epoch 8 loss:   tensor(2.6842, device='cuda:0', grad_fn=<DivBackward0>)
epoch 9 loss:   tensor(2.6478, device='cuda:0', grad_fn=<DivBackward0>)
epoch 10 loss:   tensor(2.5981, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 10 :
AUC:0.6924   accuracy:60.0536%
epoch 11 loss:   tensor(2.5698, device='cuda:0', grad_fn=<DivBackward0>)
epoch 12 loss:   tensor(2.5497, device='cuda:0', grad_fn=<DivBackward0>)
epoch 13 loss:   tensor(2.5232, device='cuda:0', grad_fn=<DivBackward0>)
epoch 14 loss: 

epoch 108 loss:   tensor(0.1040, device='cuda:0', grad_fn=<DivBackward0>)
epoch 109 loss:   tensor(0.1396, device='cuda:0', grad_fn=<DivBackward0>)
epoch 110 loss:   tensor(0.1159, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 110 :
AUC:0.9163   accuracy:84.8526%
epoch 111 loss:   tensor(0.0798, device='cuda:0', grad_fn=<DivBackward0>)
epoch 112 loss:   tensor(0.1053, device='cuda:0', grad_fn=<DivBackward0>)
epoch 113 loss:   tensor(0.1451, device='cuda:0', grad_fn=<DivBackward0>)
epoch 114 loss:   tensor(0.1026, device='cuda:0', grad_fn=<DivBackward0>)
epoch 115 loss:   tensor(0.1027, device='cuda:0', grad_fn=<DivBackward0>)
epoch 116 loss:   tensor(0.0841, device='cuda:0', grad_fn=<DivBackward0>)
epoch 117 loss:   tensor(0.1055, device='cuda:0', grad_fn=<DivBackward0>)
epoch 118 loss:   tensor(0.1174, device='cuda:0', grad_fn=<DivBackward0>)
epoch 119 loss:   tensor(0.0452, device='cuda:0', grad_fn=<DivBackward0>)
epoch 120 loss:   tensor(0.1687, device='cuda:0', grad_fn=<DivBack

epoch 212 loss:   tensor(0.1261, device='cuda:0', grad_fn=<DivBackward0>)
epoch 213 loss:   tensor(0.0307, device='cuda:0', grad_fn=<DivBackward0>)
epoch 214 loss:   tensor(0.0890, device='cuda:0', grad_fn=<DivBackward0>)
epoch 215 loss:   tensor(0.0635, device='cuda:0', grad_fn=<DivBackward0>)
epoch 216 loss:   tensor(0.0571, device='cuda:0', grad_fn=<DivBackward0>)
epoch 217 loss:   tensor(0.0591, device='cuda:0', grad_fn=<DivBackward0>)
epoch 218 loss:   tensor(0.0300, device='cuda:0', grad_fn=<DivBackward0>)
epoch 219 loss:   tensor(0.0640, device='cuda:0', grad_fn=<DivBackward0>)
epoch 220 loss:   tensor(0.0716, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 220 :
AUC:0.9372   accuracy:86.6622%
epoch 221 loss:   tensor(0.0170, device='cuda:0', grad_fn=<DivBackward0>)
epoch 222 loss:   tensor(0.0669, device='cuda:0', grad_fn=<DivBackward0>)
epoch 223 loss:   tensor(0.0744, device='cuda:0', grad_fn=<DivBackward0>)
epoch 224 loss:   tensor(0.0430, device='cuda:0', grad_fn=<DivBack

In [68]:
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
seq_reduce=0
reads_reduce=15
#model.load_state_dict(torch.load('./model/m6A_NSWord_35reads_25sites_keep_model_190_222000.pkl'))
model.load_state_dict(torch.load('./model/m6A_NSWord_35reads_25sites_keep_model_210_222000.pkl'))
detailed_test(model,m6A_Nano_test_loader,device,seq_reduce,reads_reduce,'Blocks=[222000],35reads_25sites')

Im total 1492 samples:
AUC:0.9432   accuracy:86.0590%
Precision when positive threshold at 0.5 is :0.8171% (total:809)
Precision when positive threshold at 0.6 is :0.8254% (total:796)
Precision when positive threshold at 0.8 is :0.8455% (total:770)
Precision when positive threshold at 0.7 is :0.8365% (total:783)
Precision when positive threshold at 0.9 is :0.8581% (total:747)
Precision when positive threshold at 0.95 is :0.8700% (total:731)
Precision when positive threshold at 0.98 is :0.8838% (total:714)
Precision when positive threshold at 0.99 is :0.8911% (total:698)
Precision when positive threshold at 0.995 is :0.8964% (total:685)
Precision when positive threshold at 0.999 is :0.9102% (total:646)
Precision when positive threshold at 0.9995 is :0.9140% (total:628)
Precision when positive threshold at 0.9999 is :0.9291% (total:564)
Precision when positive threshold at 0.99995 is :0.9315% (total:540)
Precision when positive threshold at 0.99999 is :0.9430% (total:474)
Precision when 

In [37]:
#20reads_25sites
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
optimizer=optim.Adam(model.parameters(),lr=0.0001)
loss_func=nn.BCELoss().to(device)
epochs=300
seq_reduce=0
reads_reduce=30
train(model,m6A_Nano_train_loader,m6A_Nano_test_loader,device,optimizer,loss_func,epochs,seq_reduce,reads_reduce)

epoch 1 loss:   tensor(2.7624, device='cuda:0', grad_fn=<DivBackward0>)
epoch 2 loss:   tensor(2.7084, device='cuda:0', grad_fn=<DivBackward0>)
epoch 3 loss:   tensor(2.6911, device='cuda:0', grad_fn=<DivBackward0>)
epoch 4 loss:   tensor(2.6773, device='cuda:0', grad_fn=<DivBackward0>)
epoch 5 loss:   tensor(2.6620, device='cuda:0', grad_fn=<DivBackward0>)
epoch 6 loss:   tensor(2.6301, device='cuda:0', grad_fn=<DivBackward0>)
epoch 7 loss:   tensor(2.5984, device='cuda:0', grad_fn=<DivBackward0>)
epoch 8 loss:   tensor(2.5532, device='cuda:0', grad_fn=<DivBackward0>)
epoch 9 loss:   tensor(2.5133, device='cuda:0', grad_fn=<DivBackward0>)
epoch 10 loss:   tensor(2.4507, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 10 :
AUC:0.7479   accuracy:66.0858%
epoch 11 loss:   tensor(2.3903, device='cuda:0', grad_fn=<DivBackward0>)
epoch 12 loss:   tensor(2.3412, device='cuda:0', grad_fn=<DivBackward0>)
epoch 13 loss:   tensor(2.2803, device='cuda:0', grad_fn=<DivBackward0>)
epoch 14 loss: 

epoch 108 loss:   tensor(0.1147, device='cuda:0', grad_fn=<DivBackward0>)
epoch 109 loss:   tensor(0.1174, device='cuda:0', grad_fn=<DivBackward0>)
epoch 110 loss:   tensor(0.1233, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 110 :
AUC:0.9065   accuracy:82.9759%
epoch 111 loss:   tensor(0.1215, device='cuda:0', grad_fn=<DivBackward0>)
epoch 112 loss:   tensor(0.0908, device='cuda:0', grad_fn=<DivBackward0>)
epoch 113 loss:   tensor(0.1287, device='cuda:0', grad_fn=<DivBackward0>)
epoch 114 loss:   tensor(0.0995, device='cuda:0', grad_fn=<DivBackward0>)
epoch 115 loss:   tensor(0.1283, device='cuda:0', grad_fn=<DivBackward0>)
epoch 116 loss:   tensor(0.1010, device='cuda:0', grad_fn=<DivBackward0>)
epoch 117 loss:   tensor(0.1442, device='cuda:0', grad_fn=<DivBackward0>)
epoch 118 loss:   tensor(0.0597, device='cuda:0', grad_fn=<DivBackward0>)
epoch 119 loss:   tensor(0.1403, device='cuda:0', grad_fn=<DivBackward0>)
epoch 120 loss:   tensor(0.0723, device='cuda:0', grad_fn=<DivBack

epoch 212 loss:   tensor(0.0697, device='cuda:0', grad_fn=<DivBackward0>)
epoch 213 loss:   tensor(0.0466, device='cuda:0', grad_fn=<DivBackward0>)
epoch 214 loss:   tensor(0.0795, device='cuda:0', grad_fn=<DivBackward0>)
epoch 215 loss:   tensor(0.0670, device='cuda:0', grad_fn=<DivBackward0>)
epoch 216 loss:   tensor(0.1106, device='cuda:0', grad_fn=<DivBackward0>)
epoch 217 loss:   tensor(0.0625, device='cuda:0', grad_fn=<DivBackward0>)
epoch 218 loss:   tensor(0.0712, device='cuda:0', grad_fn=<DivBackward0>)
epoch 219 loss:   tensor(0.0399, device='cuda:0', grad_fn=<DivBackward0>)
epoch 220 loss:   tensor(0.0747, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 220 :
AUC:0.9239   accuracy:84.9866%
epoch 221 loss:   tensor(0.0336, device='cuda:0', grad_fn=<DivBackward0>)
epoch 222 loss:   tensor(0.0605, device='cuda:0', grad_fn=<DivBackward0>)
epoch 223 loss:   tensor(0.0286, device='cuda:0', grad_fn=<DivBackward0>)
epoch 224 loss:   tensor(0.0782, device='cuda:0', grad_fn=<DivBack

In [47]:
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
seq_reduce=0
reads_reduce=30
#model.load_state_dict(torch.load('./model/m6A_NSWord_20reads_25sites_keep_model_280_222000.pkl'))
model.load_state_dict(torch.load('./model/m6A_NSWord_20reads_25sites_keep_model_230_222000.pkl'))
detailed_test(model,m6A_Nano_test_loader,device,seq_reduce,reads_reduce,'Blocks=[222000],20reads_25sites')

Im total 1492 samples:
AUC:0.9349   accuracy:85.9920%
Precision when positive threshold at 0.5 is :0.8299% (total:776)
Precision when positive threshold at 0.6 is :0.8375% (total:763)
Precision when positive threshold at 0.8 is :0.8566% (total:732)
Precision when positive threshold at 0.7 is :0.8438% (total:749)
Precision when positive threshold at 0.9 is :0.8697% (total:706)
Precision when positive threshold at 0.95 is :0.8752% (total:689)
Precision when positive threshold at 0.98 is :0.8887% (total:656)
Precision when positive threshold at 0.99 is :0.8974% (total:643)
Precision when positive threshold at 0.995 is :0.9022% (total:624)
Precision when positive threshold at 0.999 is :0.9198% (total:586)
Precision when positive threshold at 0.9995 is :0.9321% (total:560)
Precision when positive threshold at 0.9999 is :0.9516% (total:496)
Precision when positive threshold at 0.99995 is :0.9528% (total:466)
Precision when positive threshold at 0.99999 is :0.9631% (total:407)
Precision when 

In [33]:
#0reads_25sites
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
optimizer=optim.Adam(model.parameters(),lr=0.0001)
loss_func=nn.BCELoss().to(device)
epochs=300
seq_reduce=0
reads_reduce=50
train(model,m6A_Nano_train_loader,m6A_Nano_test_loader,device,optimizer,loss_func,epochs,seq_reduce,reads_reduce)

epoch 1 loss:   tensor(2.7679, device='cuda:0', grad_fn=<DivBackward0>)
epoch 2 loss:   tensor(2.7236, device='cuda:0', grad_fn=<DivBackward0>)
epoch 3 loss:   tensor(2.6957, device='cuda:0', grad_fn=<DivBackward0>)
epoch 4 loss:   tensor(2.6815, device='cuda:0', grad_fn=<DivBackward0>)
epoch 5 loss:   tensor(2.6691, device='cuda:0', grad_fn=<DivBackward0>)
epoch 6 loss:   tensor(2.6513, device='cuda:0', grad_fn=<DivBackward0>)
epoch 7 loss:   tensor(2.6438, device='cuda:0', grad_fn=<DivBackward0>)
epoch 8 loss:   tensor(2.6288, device='cuda:0', grad_fn=<DivBackward0>)
epoch 9 loss:   tensor(2.6209, device='cuda:0', grad_fn=<DivBackward0>)
epoch 10 loss:   tensor(2.6236, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 10 :
AUC:0.6418   accuracy:60.5228%
epoch 11 loss:   tensor(2.6016, device='cuda:0', grad_fn=<DivBackward0>)
epoch 12 loss:   tensor(2.5987, device='cuda:0', grad_fn=<DivBackward0>)
epoch 13 loss:   tensor(2.5834, device='cuda:0', grad_fn=<DivBackward0>)
epoch 14 loss: 

epoch 108 loss:   tensor(0.1093, device='cuda:0', grad_fn=<DivBackward0>)
epoch 109 loss:   tensor(0.1490, device='cuda:0', grad_fn=<DivBackward0>)
epoch 110 loss:   tensor(0.1281, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 110 :
AUC:0.8787   accuracy:81.0992%
epoch 111 loss:   tensor(0.1339, device='cuda:0', grad_fn=<DivBackward0>)
epoch 112 loss:   tensor(0.1218, device='cuda:0', grad_fn=<DivBackward0>)
epoch 113 loss:   tensor(0.1270, device='cuda:0', grad_fn=<DivBackward0>)
epoch 114 loss:   tensor(0.1575, device='cuda:0', grad_fn=<DivBackward0>)
epoch 115 loss:   tensor(0.1519, device='cuda:0', grad_fn=<DivBackward0>)
epoch 116 loss:   tensor(0.1067, device='cuda:0', grad_fn=<DivBackward0>)
epoch 117 loss:   tensor(0.0976, device='cuda:0', grad_fn=<DivBackward0>)
epoch 118 loss:   tensor(0.1141, device='cuda:0', grad_fn=<DivBackward0>)
epoch 119 loss:   tensor(0.2188, device='cuda:0', grad_fn=<DivBackward0>)
epoch 120 loss:   tensor(0.0889, device='cuda:0', grad_fn=<DivBack

epoch 212 loss:   tensor(0.0567, device='cuda:0', grad_fn=<DivBackward0>)
epoch 213 loss:   tensor(0.0167, device='cuda:0', grad_fn=<DivBackward0>)
epoch 214 loss:   tensor(0.1012, device='cuda:0', grad_fn=<DivBackward0>)
epoch 215 loss:   tensor(0.0389, device='cuda:0', grad_fn=<DivBackward0>)
epoch 216 loss:   tensor(0.0761, device='cuda:0', grad_fn=<DivBackward0>)
epoch 217 loss:   tensor(0.0701, device='cuda:0', grad_fn=<DivBackward0>)
epoch 218 loss:   tensor(0.0305, device='cuda:0', grad_fn=<DivBackward0>)
epoch 219 loss:   tensor(0.0675, device='cuda:0', grad_fn=<DivBackward0>)
epoch 220 loss:   tensor(0.0528, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 220 :
AUC:0.8936   accuracy:81.7694%
epoch 221 loss:   tensor(0.0550, device='cuda:0', grad_fn=<DivBackward0>)
epoch 222 loss:   tensor(0.0503, device='cuda:0', grad_fn=<DivBackward0>)
epoch 223 loss:   tensor(0.0813, device='cuda:0', grad_fn=<DivBackward0>)
epoch 224 loss:   tensor(0.0618, device='cuda:0', grad_fn=<DivBack

In [49]:
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
seq_reduce=0
reads_reduce=50
#model.load_state_dict(torch.load('./model/m6A_NSWord_0reads_25sites_keep_model_280_222000.pkl'))
model.load_state_dict(torch.load('./model/m6A_NSWord_0reads_25sites_keep_model_290_222000.pkl'))
detailed_test(model,m6A_Nano_test_loader,device,seq_reduce,reads_reduce,'Blocks=[222000],0reads_25sites')

Im total 1492 samples:
AUC:0.9100   accuracy:83.7131%
Precision when positive threshold at 0.5 is :0.8048% (total:784)
Precision when positive threshold at 0.6 is :0.8119% (total:776)
Precision when positive threshold at 0.8 is :0.8340% (total:747)
Precision when positive threshold at 0.7 is :0.8274% (total:759)
Precision when positive threshold at 0.9 is :0.8404% (total:733)
Precision when positive threshold at 0.95 is :0.8500% (total:720)
Precision when positive threshold at 0.98 is :0.8610% (total:705)
Precision when positive threshold at 0.99 is :0.8737% (total:689)
Precision when positive threshold at 0.995 is :0.8879% (total:669)
Precision when positive threshold at 0.999 is :0.9028% (total:638)
Precision when positive threshold at 0.9995 is :0.9117% (total:623)
Precision when positive threshold at 0.9999 is :0.9333% (total:555)
Precision when positive threshold at 0.99995 is :0.9377% (total:530)
Precision when positive threshold at 0.99999 is :0.9600% (total:450)
Precision when 

# Different seq len

In [61]:
#50reads_21sites
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
optimizer=optim.Adam(model.parameters(),lr=0.0001)
loss_func=nn.BCELoss().to(device)
epochs=300
seq_reduce=4
reads_reduce=0
train(model,m6A_Nano_train_loader,m6A_Nano_test_loader,device,optimizer,loss_func,epochs,seq_reduce,reads_reduce)

epoch 1 loss:   tensor(2.7322, device='cuda:0', grad_fn=<DivBackward0>)
epoch 2 loss:   tensor(2.6841, device='cuda:0', grad_fn=<DivBackward0>)
epoch 3 loss:   tensor(2.6749, device='cuda:0', grad_fn=<DivBackward0>)
epoch 4 loss:   tensor(2.6384, device='cuda:0', grad_fn=<DivBackward0>)
epoch 5 loss:   tensor(2.5944, device='cuda:0', grad_fn=<DivBackward0>)
epoch 6 loss:   tensor(2.5493, device='cuda:0', grad_fn=<DivBackward0>)
epoch 7 loss:   tensor(2.5190, device='cuda:0', grad_fn=<DivBackward0>)
epoch 8 loss:   tensor(2.4897, device='cuda:0', grad_fn=<DivBackward0>)
epoch 9 loss:   tensor(2.4428, device='cuda:0', grad_fn=<DivBackward0>)
epoch 10 loss:   tensor(2.4020, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 10 :
AUC:0.7719   accuracy:70.6434%
epoch 11 loss:   tensor(2.3384, device='cuda:0', grad_fn=<DivBackward0>)
epoch 12 loss:   tensor(2.2926, device='cuda:0', grad_fn=<DivBackward0>)
epoch 13 loss:   tensor(2.2344, device='cuda:0', grad_fn=<DivBackward0>)
epoch 14 loss: 

epoch 108 loss:   tensor(0.1352, device='cuda:0', grad_fn=<DivBackward0>)
epoch 109 loss:   tensor(0.1382, device='cuda:0', grad_fn=<DivBackward0>)
epoch 110 loss:   tensor(0.1139, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 110 :
AUC:0.9206   accuracy:85.3887%
epoch 111 loss:   tensor(0.1442, device='cuda:0', grad_fn=<DivBackward0>)
epoch 112 loss:   tensor(0.1262, device='cuda:0', grad_fn=<DivBackward0>)
epoch 113 loss:   tensor(0.1228, device='cuda:0', grad_fn=<DivBackward0>)
epoch 114 loss:   tensor(0.1144, device='cuda:0', grad_fn=<DivBackward0>)
epoch 115 loss:   tensor(0.1199, device='cuda:0', grad_fn=<DivBackward0>)
epoch 116 loss:   tensor(0.1398, device='cuda:0', grad_fn=<DivBackward0>)
epoch 117 loss:   tensor(0.1307, device='cuda:0', grad_fn=<DivBackward0>)
epoch 118 loss:   tensor(0.1044, device='cuda:0', grad_fn=<DivBackward0>)
epoch 119 loss:   tensor(0.1229, device='cuda:0', grad_fn=<DivBackward0>)
epoch 120 loss:   tensor(0.1372, device='cuda:0', grad_fn=<DivBack

epoch 212 loss:   tensor(0.0528, device='cuda:0', grad_fn=<DivBackward0>)
epoch 213 loss:   tensor(0.0434, device='cuda:0', grad_fn=<DivBackward0>)
epoch 214 loss:   tensor(0.0732, device='cuda:0', grad_fn=<DivBackward0>)
epoch 215 loss:   tensor(0.0720, device='cuda:0', grad_fn=<DivBackward0>)
epoch 216 loss:   tensor(0.0501, device='cuda:0', grad_fn=<DivBackward0>)
epoch 217 loss:   tensor(0.0660, device='cuda:0', grad_fn=<DivBackward0>)
epoch 218 loss:   tensor(0.0657, device='cuda:0', grad_fn=<DivBackward0>)
epoch 219 loss:   tensor(0.0532, device='cuda:0', grad_fn=<DivBackward0>)
epoch 220 loss:   tensor(0.0713, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 220 :
AUC:0.9348   accuracy:85.0536%
epoch 221 loss:   tensor(0.0437, device='cuda:0', grad_fn=<DivBackward0>)
epoch 222 loss:   tensor(0.0502, device='cuda:0', grad_fn=<DivBackward0>)
epoch 223 loss:   tensor(0.0547, device='cuda:0', grad_fn=<DivBackward0>)
epoch 224 loss:   tensor(0.0835, device='cuda:0', grad_fn=<DivBack

In [69]:
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
seq_reduce=4
reads_reduce=0
#model.load_state_dict(torch.load('./model/m6A_NSWord_50reads_21sites_keep_model_280_222000.pkl'))
model.load_state_dict(torch.load('./model/m6A_NSWord_50reads_21sites_keep_model_240_222000.pkl'))
detailed_test(model,m6A_Nano_test_loader,device,seq_reduce,reads_reduce,'Blocks=[222000],50reads_21sites')

Im total 1492 samples:
AUC:0.9350   accuracy:85.9249%
Precision when positive threshold at 0.5 is :0.8214% (total:795)
Precision when positive threshold at 0.6 is :0.8247% (total:787)
Precision when positive threshold at 0.8 is :0.8382% (total:760)
Precision when positive threshold at 0.7 is :0.8305% (total:773)
Precision when positive threshold at 0.9 is :0.8466% (total:743)
Precision when positive threshold at 0.95 is :0.8583% (total:720)
Precision when positive threshold at 0.98 is :0.8686% (total:700)
Precision when positive threshold at 0.99 is :0.8791% (total:678)
Precision when positive threshold at 0.995 is :0.8947% (total:655)
Precision when positive threshold at 0.999 is :0.9056% (total:604)
Precision when positive threshold at 0.9995 is :0.9128% (total:585)
Precision when positive threshold at 0.9999 is :0.9345% (total:534)
Precision when positive threshold at 0.99995 is :0.9406% (total:505)
Precision when positive threshold at 0.99999 is :0.9400% (total:450)
Precision when 

In [54]:
#50reads_15sites
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
optimizer=optim.Adam(model.parameters(),lr=0.0001)
loss_func=nn.BCELoss().to(device)
epochs=300
seq_reduce=10
reads_reduce=0
train(model,m6A_Nano_train_loader,m6A_Nano_test_loader,device,optimizer,loss_func,epochs,seq_reduce,reads_reduce)

epoch 1 loss:   tensor(2.7308, device='cuda:0', grad_fn=<DivBackward0>)
epoch 2 loss:   tensor(2.6793, device='cuda:0', grad_fn=<DivBackward0>)
epoch 3 loss:   tensor(2.6314, device='cuda:0', grad_fn=<DivBackward0>)
epoch 4 loss:   tensor(2.5988, device='cuda:0', grad_fn=<DivBackward0>)
epoch 5 loss:   tensor(2.5752, device='cuda:0', grad_fn=<DivBackward0>)
epoch 6 loss:   tensor(2.5475, device='cuda:0', grad_fn=<DivBackward0>)
epoch 7 loss:   tensor(2.5218, device='cuda:0', grad_fn=<DivBackward0>)
epoch 8 loss:   tensor(2.4918, device='cuda:0', grad_fn=<DivBackward0>)
epoch 9 loss:   tensor(2.4717, device='cuda:0', grad_fn=<DivBackward0>)
epoch 10 loss:   tensor(2.4363, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 10 :
AUC:0.7385   accuracy:68.2976%
epoch 11 loss:   tensor(2.3969, device='cuda:0', grad_fn=<DivBackward0>)
epoch 12 loss:   tensor(2.3280, device='cuda:0', grad_fn=<DivBackward0>)
epoch 13 loss:   tensor(2.2811, device='cuda:0', grad_fn=<DivBackward0>)
epoch 14 loss: 

epoch 108 loss:   tensor(0.1848, device='cuda:0', grad_fn=<DivBackward0>)
epoch 109 loss:   tensor(0.1533, device='cuda:0', grad_fn=<DivBackward0>)
epoch 110 loss:   tensor(0.1938, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 110 :
AUC:0.9084   accuracy:82.9759%
epoch 111 loss:   tensor(0.2001, device='cuda:0', grad_fn=<DivBackward0>)
epoch 112 loss:   tensor(0.1536, device='cuda:0', grad_fn=<DivBackward0>)
epoch 113 loss:   tensor(0.1855, device='cuda:0', grad_fn=<DivBackward0>)
epoch 114 loss:   tensor(0.1605, device='cuda:0', grad_fn=<DivBackward0>)
epoch 115 loss:   tensor(0.1640, device='cuda:0', grad_fn=<DivBackward0>)
epoch 116 loss:   tensor(0.1868, device='cuda:0', grad_fn=<DivBackward0>)
epoch 117 loss:   tensor(0.1385, device='cuda:0', grad_fn=<DivBackward0>)
epoch 118 loss:   tensor(0.2106, device='cuda:0', grad_fn=<DivBackward0>)
epoch 119 loss:   tensor(0.1687, device='cuda:0', grad_fn=<DivBackward0>)
epoch 120 loss:   tensor(0.1505, device='cuda:0', grad_fn=<DivBack

epoch 212 loss:   tensor(0.0917, device='cuda:0', grad_fn=<DivBackward0>)
epoch 213 loss:   tensor(0.0652, device='cuda:0', grad_fn=<DivBackward0>)
epoch 214 loss:   tensor(0.0897, device='cuda:0', grad_fn=<DivBackward0>)
epoch 215 loss:   tensor(0.0793, device='cuda:0', grad_fn=<DivBackward0>)
epoch 216 loss:   tensor(0.0941, device='cuda:0', grad_fn=<DivBackward0>)
epoch 217 loss:   tensor(0.0682, device='cuda:0', grad_fn=<DivBackward0>)
epoch 218 loss:   tensor(0.1001, device='cuda:0', grad_fn=<DivBackward0>)
epoch 219 loss:   tensor(0.0622, device='cuda:0', grad_fn=<DivBackward0>)
epoch 220 loss:   tensor(0.0674, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 220 :
AUC:0.9003   accuracy:82.4397%
epoch 221 loss:   tensor(0.0960, device='cuda:0', grad_fn=<DivBackward0>)
epoch 222 loss:   tensor(0.0937, device='cuda:0', grad_fn=<DivBackward0>)
epoch 223 loss:   tensor(0.0665, device='cuda:0', grad_fn=<DivBackward0>)
epoch 224 loss:   tensor(0.0791, device='cuda:0', grad_fn=<DivBack

In [60]:
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
seq_reduce=10
reads_reduce=0
#model.load_state_dict(torch.load('./model/m6A_NSWord_50reads_15sites_keep_model_170_222000.pkl'))
model.load_state_dict(torch.load('./model/m6A_NSWord_50reads_15sites_keep_model_290_222000.pkl'))
detailed_test(model,m6A_Nano_test_loader,device,seq_reduce,reads_reduce,'Blocks=[222000],50reads_15sites')

Im total 1492 samples:
AUC:0.9190   accuracy:84.8526%
Precision when positive threshold at 0.5 is :0.8000% (total:825)
Precision when positive threshold at 0.6 is :0.8034% (total:814)
Precision when positive threshold at 0.8 is :0.8137% (total:789)
Precision when positive threshold at 0.7 is :0.8090% (total:801)
Precision when positive threshold at 0.9 is :0.8235% (total:765)
Precision when positive threshold at 0.95 is :0.8398% (total:743)
Precision when positive threshold at 0.98 is :0.8559% (total:708)
Precision when positive threshold at 0.99 is :0.8624% (total:683)
Precision when positive threshold at 0.995 is :0.8680% (total:659)
Precision when positive threshold at 0.999 is :0.8983% (total:590)
Precision when positive threshold at 0.9995 is :0.8990% (total:574)
Precision when positive threshold at 0.9999 is :0.9028% (total:535)
Precision when positive threshold at 0.99995 is :0.9035% (total:518)
Precision when positive threshold at 0.99999 is :0.9188% (total:468)
Precision when 

In [56]:
#50reads_5sites
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
optimizer=optim.Adam(model.parameters(),lr=0.0001)
loss_func=nn.BCELoss().to(device)
epochs=300
seq_reduce=20
reads_reduce=0
train(model,m6A_Nano_train_loader,m6A_Nano_test_loader,device,optimizer,loss_func,epochs,seq_reduce,reads_reduce)

epoch 1 loss:   tensor(2.7262, device='cuda:0', grad_fn=<DivBackward0>)
epoch 2 loss:   tensor(2.6844, device='cuda:0', grad_fn=<DivBackward0>)
epoch 3 loss:   tensor(2.6658, device='cuda:0', grad_fn=<DivBackward0>)
epoch 4 loss:   tensor(2.6512, device='cuda:0', grad_fn=<DivBackward0>)
epoch 5 loss:   tensor(2.6253, device='cuda:0', grad_fn=<DivBackward0>)
epoch 6 loss:   tensor(2.5934, device='cuda:0', grad_fn=<DivBackward0>)
epoch 7 loss:   tensor(2.5924, device='cuda:0', grad_fn=<DivBackward0>)
epoch 8 loss:   tensor(2.5663, device='cuda:0', grad_fn=<DivBackward0>)
epoch 9 loss:   tensor(2.5472, device='cuda:0', grad_fn=<DivBackward0>)
epoch 10 loss:   tensor(2.5259, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 10 :
AUC:0.7219   accuracy:66.6890%
epoch 11 loss:   tensor(2.5111, device='cuda:0', grad_fn=<DivBackward0>)
epoch 12 loss:   tensor(2.4974, device='cuda:0', grad_fn=<DivBackward0>)
epoch 13 loss:   tensor(2.4785, device='cuda:0', grad_fn=<DivBackward0>)
epoch 14 loss: 

epoch 108 loss:   tensor(0.4114, device='cuda:0', grad_fn=<DivBackward0>)
epoch 109 loss:   tensor(0.3767, device='cuda:0', grad_fn=<DivBackward0>)
epoch 110 loss:   tensor(0.3752, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 110 :
AUC:0.8456   accuracy:76.4745%
epoch 111 loss:   tensor(0.4052, device='cuda:0', grad_fn=<DivBackward0>)
epoch 112 loss:   tensor(0.3610, device='cuda:0', grad_fn=<DivBackward0>)
epoch 113 loss:   tensor(0.3596, device='cuda:0', grad_fn=<DivBackward0>)
epoch 114 loss:   tensor(0.3616, device='cuda:0', grad_fn=<DivBackward0>)
epoch 115 loss:   tensor(0.3835, device='cuda:0', grad_fn=<DivBackward0>)
epoch 116 loss:   tensor(0.3508, device='cuda:0', grad_fn=<DivBackward0>)
epoch 117 loss:   tensor(0.3203, device='cuda:0', grad_fn=<DivBackward0>)
epoch 118 loss:   tensor(0.3678, device='cuda:0', grad_fn=<DivBackward0>)
epoch 119 loss:   tensor(0.3228, device='cuda:0', grad_fn=<DivBackward0>)
epoch 120 loss:   tensor(0.2981, device='cuda:0', grad_fn=<DivBack

epoch 212 loss:   tensor(0.1108, device='cuda:0', grad_fn=<DivBackward0>)
epoch 213 loss:   tensor(0.1398, device='cuda:0', grad_fn=<DivBackward0>)
epoch 214 loss:   tensor(0.1194, device='cuda:0', grad_fn=<DivBackward0>)
epoch 215 loss:   tensor(0.1084, device='cuda:0', grad_fn=<DivBackward0>)
epoch 216 loss:   tensor(0.1437, device='cuda:0', grad_fn=<DivBackward0>)
epoch 217 loss:   tensor(0.1219, device='cuda:0', grad_fn=<DivBackward0>)
epoch 218 loss:   tensor(0.1160, device='cuda:0', grad_fn=<DivBackward0>)
epoch 219 loss:   tensor(0.1527, device='cuda:0', grad_fn=<DivBackward0>)
epoch 220 loss:   tensor(0.0985, device='cuda:0', grad_fn=<DivBackward0>)
At epoch 220 :
AUC:0.8573   accuracy:78.0161%
epoch 221 loss:   tensor(0.1201, device='cuda:0', grad_fn=<DivBackward0>)
epoch 222 loss:   tensor(0.1505, device='cuda:0', grad_fn=<DivBackward0>)
epoch 223 loss:   tensor(0.1238, device='cuda:0', grad_fn=<DivBackward0>)
epoch 224 loss:   tensor(0.0761, device='cuda:0', grad_fn=<DivBack

In [59]:
device=torch.device('cuda:0')
model=Nano(c_s=4,c_x=3,c_emb=96,c_f=16,c_hidden_att=64,c_o=1,no_heads=8,blocks_lis=[2,2,2,0,0,0],
            row_dropout=0.1,col_dropout=0.1,transition_n=2,inf=1e9,eps=1e-8,
            clear_cache_between_blocks=False).to(device)
seq_reduce=20
reads_reduce=0
#model.load_state_dict(torch.load('./model/m6A_NSWord_50reads_5sites_keep_model_210_222000.pkl'))
model.load_state_dict(torch.load('./model/m6A_NSWord_50reads_5sites_keep_model_50_222000.pkl'))
detailed_test(model,m6A_Nano_test_loader,device,seq_reduce,reads_reduce,'Blocks=[222000],50reads_5sites')

Im total 1492 samples:
AUC:0.8616   accuracy:77.8820%
Precision when positive threshold at 0.5 is :0.7292% (total:853)
Precision when positive threshold at 0.6 is :0.7628% (total:759)
Precision when positive threshold at 0.8 is :0.8210% (total:553)
Precision when positive threshold at 0.7 is :0.7964% (total:658)
Precision when positive threshold at 0.9 is :0.8659% (total:358)
Precision when positive threshold at 0.95 is :0.9186% (total:221)
Precision when positive threshold at 0.98 is :0.9583% (total:96)
Precision when positive threshold at 0.99 is :0.9630% (total:54)
Precision when positive threshold at 0.995 is :0.9677% (total:31)
Precision when positive threshold at 0.999 is :1.0000% (total:3)
