In [40]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import cohen_kappa_score
import copy
from sklearn.metrics import confusion_matrix
from features_general import aac_gen,dpc_gen,bin_aac
from sklearn.linear_model import RidgeClassifier
from sklearn.ensemble import ExtraTreesClassifier 
import pickle
import warnings
warnings.filterwarnings('ignore')

In [41]:
def load_models_run_test(option,root,X,Y):
    print("TEST")
    path=root
   
    if option=='aac':
        path+='/aac_models/'
    elif option=='dpc':
        path+='/dpc_models/'
    elif option=='bin_n5':
        path+='/bin_n5_models/'
    elif option=='bin_n10':
        path+='/bin_n10_models/'
    elif option=='bin_n15':
        path+='/bin_n15_models/'
    elif option=='bin_c5':
        path+='/bin_c5_models/'
    elif option=='bin_c10':
        path+='/bin_c10_models/'
    elif option=='bin_c15':
        path+='/bin_c15_models/'
    elif option=='bin_nc5':
        path+='/bin_nc5_models/'
    elif option=='bin_nc10':
        path+='/bin_nc10_models/'
    elif option=='bin_nc15':
        path+='/bin_nc15_models/'
    #Load MLP
    clf = pickle.load(open(path + 'mlp_' + option + '.pickle','rb'))
    Perform_testing(clf,'mlp',X,Y)
    #Load SVM
    clf = pickle.load(open(path + 'svm_' + option + '.pickle','rb'))
    Perform_testing(clf,'svm',X,Y)
    #Load KNN
    clf = pickle.load(open(path + 'knn_' + option + '.pickle','rb'))
    Perform_testing(clf,'knn',X,Y)
    #Load ridge
    clf = pickle.load(open(path + 'ridge_' + option + '.pickle','rb'))
    Perform_testing(clf,'ridge',X,Y)
    #Load rf
    clf = pickle.load(open(path + 'rf_' + option + '.pickle','rb'))
    Perform_testing(clf,'rf',X,Y)
    #Load extra trees
    clf = pickle.load(open(path + 'extra_trees_' + option + '.pickle','rb'))
    Perform_testing(clf,'extra_trees',X,Y)
        
    

In [42]:
def getVector(line,option1,option2,x=None,y=None):
    if option1=='aac':
        return aac_gen(line,option2,x,y)
    elif option1=='dpc':
        return dpc_gen(line,option2,x,y)
    elif option1=='bin':
        return bin_aac(line,option2,x,y)

In [43]:
def getX(X,indexes):
    ans=[]
    for i in indexes:
        ans+=[X[i]]
    return ans

In [44]:
def getXYforfeature(name,option1,option2,append,x=None,y=None,):
    lennow=0
    if x!=None and y!=None: 
        lennow=max(x,y)
    elif x!=None:
        lennow=x
    elif y!=None:
        lennow=y
    
    positive_file=append+'/validation/pos_valid'
    negative_file=append+'/validation/neg_valid'
    X=[]
    Y=[]
    cnt=0
    f1=open(positive_file,'r')
    for line in f1:
        line=line.strip()
        line=line.upper()
        if line.count('U')==0 and len(line)>=lennow:
            X+=[getVector(line,option1,option2,x,y)]
            Y+=[+1]
    f1.close()
    
    f2=open(negative_file,'r')
    for line in f2:
        line=line.strip()
        line=line.upper()
        if line.count('U')==0 and len(line)>=lennow:
            X+=[getVector(line,option1,option2,x,y)]
            Y+=[-1]
    f2.close()  
    return X,Y

In [49]:
def Perform_testing(clf,name,X,Y):
    Y_test=Y
    Y_pred=clf.predict(X)
    
    accuracies=[]
    specificities=[]
    senstivities=[]
    roc_auc_scores=[]
    mcc_scores=[]
    cohen_scores=[]
    
    Y_scores=[]
    if hasattr(clf,'decision_function'):
        Y_scores=clf.decision_function(X)
    else:
        Y_scores=clf.predict_proba(X)[:,1]
            
    acc_score=accuracy_score(Y_test,Y_pred)
    tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
    specificity = tn / (tn+fp)
    senstivity = tp /(fn+tp)
    rocauc_score=roc_auc_score(Y_test, Y_scores)
    mcc_score=matthews_corrcoef(Y_test,Y_pred)  
    cohen_score = cohen_kappa_score(Y_test, Y_pred)
    accuracies=np.asarray(accuracies)
    specificities=np.asarray(specificities)
    senstivities=np.asarray(senstivities)
    roc_auc_scores=np.asarray(roc_auc_scores)
    mcc_scores=np.asarray(mcc_scores)
    cohen_scores=np.asarray(cohen_scores)
    print(name)
    print("Accuracy: %0.2f (+/- %0.2f)" % (accuracies.mean(), accuracies.std() * 2))
    print("Specificity: %0.2f (+/- %0.2f)" % (specificities.mean(), specificities.std() * 2))
    print("Senstivity: %0.2f (+/- %0.2f)" % (senstivities.mean(), senstivities.std() * 2))
    print("roc-auc scores: %0.2f (+/- %0.2f)" % (roc_auc_scores.mean(), roc_auc_scores.std() * 2))
    print("mcc scores: %0.2f (+/- %0.2f)" % (mcc_scores.mean(), mcc_scores.std() * 2))
    print("cohen scores: %0.2f (+/- %0.2f)" % (cohen_scores.mean(), cohen_scores.std() * 2))

In [50]:
def Perform_KFold(clf,name,X,Y):
    kf = KFold(n_splits=5,shuffle=True,random_state=42)
    accuracies=[]
    specificities=[]
    senstivities=[]
    roc_auc_scores=[]
    mcc_scores=[]
    cohen_scores=[]
    cnt=0
    for train_index, test_index in kf.split(X):
        X_train, X_test = getX(X,train_index),getX(X,test_index)
        Y_train, Y_test = getX(Y,train_index),getX(Y,test_index)
        clf.fit(X_train,Y_train)
        Y_pred=clf.predict(X_test)
        #Y_scores=clf.predict_proba(X_test)[:,1]
        Y_scores=[]
        if hasattr(clf, "decision_function"):
            Y_scores=clf.decision_function(X_test)
        else:
            Y_scores=clf.predict_proba(X_test)[:,1]
            
        acc_score=accuracy_score(Y_test,Y_pred)
        tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
        specificity = tn / (tn+fp)
        senstivity = tp /(fn+tp)
        rocauc_score=roc_auc_score(Y_test, Y_scores)
        mcc_score=matthews_corrcoef(Y_test,Y_pred)  
        cohen_score = cohen_kappa_score(Y_test, Y_pred)
        #Matrices
        accuracies+=[acc_score]
        specificities+=[specificity]
        senstivities+=[senstivity]
        roc_auc_scores+=[rocauc_score]
        mcc_scores+=[mcc_score]
        cohen_scores+=[cohen_score]
    accuracies=np.asarray(accuracies)
    specificities=np.asarray(specificities)
    senstivities=np.asarray(senstivities)
    roc_auc_scores=np.asarray(roc_auc_scores)
    mcc_scores=np.asarray(mcc_scores)
    cohen_scores=np.asarray(cohen_scores)
    print("Accuracy: %0.2f (+/- %0.2f)" % (accuracies.mean(), accuracies.std() * 2))
    print("Specificity: %0.2f (+/- %0.2f)" % (specificities.mean(), specificities.std() * 2))
    print("Senstivity: %0.2f (+/- %0.2f)" % (senstivities.mean(), senstivities.std() * 2))
    print("roc-auc scores: %0.2f (+/- %0.2f)" % (roc_auc_scores.mean(), roc_auc_scores.std() * 2))
    print("mcc scores: %0.2f (+/- %0.2f)" % (mcc_scores.mean(), mcc_scores.std() * 2))
    print("cohen scores: %0.2f (+/- %0.2f)" % (cohen_scores.mean(), cohen_scores.std() * 2))
    

In [51]:
root1='./ACPs and non-ACPS'
root2='./ACPs and random peptides'



In [52]:
#AAC
X_aac,Y_aac=getXYforfeature('aac','aac','Normal',root1)
load_models_run_test('aac',root1,X_aac,Y_aac)

TEST
mlp
Accuracy: nan (+/- nan)
Specificity: nan (+/- nan)
Senstivity: nan (+/- nan)
roc-auc scores: nan (+/- nan)
mcc scores: nan (+/- nan)
cohen scores: nan (+/- nan)
svm
Accuracy: nan (+/- nan)
Specificity: nan (+/- nan)
Senstivity: nan (+/- nan)
roc-auc scores: nan (+/- nan)
mcc scores: nan (+/- nan)
cohen scores: nan (+/- nan)
knn
Accuracy: nan (+/- nan)
Specificity: nan (+/- nan)
Senstivity: nan (+/- nan)
roc-auc scores: nan (+/- nan)
mcc scores: nan (+/- nan)
cohen scores: nan (+/- nan)
ridge
Accuracy: nan (+/- nan)
Specificity: nan (+/- nan)
Senstivity: nan (+/- nan)
roc-auc scores: nan (+/- nan)
mcc scores: nan (+/- nan)
cohen scores: nan (+/- nan)
rf
Accuracy: nan (+/- nan)
Specificity: nan (+/- nan)
Senstivity: nan (+/- nan)
roc-auc scores: nan (+/- nan)
mcc scores: nan (+/- nan)
cohen scores: nan (+/- nan)
extra_trees
Accuracy: nan (+/- nan)
Specificity: nan (+/- nan)
Senstivity: nan (+/- nan)
roc-auc scores: nan (+/- nan)
mcc scores: nan (+/- nan)
cohen scores: nan (+/- n