In [1]:
for p in ("Knet", "Plots", "NBInclude")
    Pkg.installed(p) == nothing && Pkg.add(p);
end

In [2]:
using Knet, Plots, NBInclude;
nbinclude("deepppiutils.ipynb"); # loads trnper, devper, tstper, featuresDict, concatAB, proteinA, proteinB, 
                            # ygold,winit, minibatchi, predict,loss,zeroone, report, lossgradient

## Model initialization

In [3]:
# the number of hidden units in the hidden layers of the DeepPPI-CON model
HIDDENSSEP = Any[NOINPUTS, 512, 256, 128]; 
HIDDENSMER = Any[256, 128, NOOUTPUTS]
NOEPOCH = 30;
BATCHSIZE = 64;
PDROP = (0, 0.2);

## Model Training and evaluation
For model evaluation a 5-holdout validation process was applied and an average performance was calculated

In [4]:
accuracy =[]
recall=[]
specifity=[]
precision= []
mcc=[]
f1=[]
npv=[]
accuracyt= recalli=specifityi=precisioni=mcci = 0.0
for i in 1:5
    #setseed(i);
    wa = winit(HIDDENSSEP...);
    wb = winit(HIDDENSSEP...);
    wMerged = winit(HIDDENSMER...);
    w = vcat(wa, wb, wMerged);
    
    #dtrn, ddev, dtst = dividedataset(concatAB, ygold, trnper, devper, tstper; batchsize= BATCHSIZE);
    dtrn, dtst = dividedataset(concatAB, ygold, trnper, devper, tstper; batchsize= BATCHSIZE, dev=false);
    
    optims = params(w; optim="Momentum", lr=0.01, gamma=0.9);
    #@time trnloss, trnerr, tstloss, tsterr=trainSep!(w, optims, dtrn, predictSep, ddev; pdrop=PDROP, epochs=NOEPOCH) 
    @time trainSep!(w, optims, dtrn, predictSep, dtst; pdrop=PDROP, epochs=NOEPOCH) 
    
    println("Dataset", i)
    println("Training: min. loss =",loss(w,dtrn,predictSep),", min. error =",zeroone(w,dtrn,predictSep))  
    println("Test: min. loss =",loss(w,dtst,predictSep),", min. error =",zeroone(w,dtst,predictSep))  
    
    accuracyt,recalli,specifityi,precisioni,mcci,f1i,npvi = modelevaluation(w, dtst, predictSep; p=true);
    push!(accuracy, accuracyt)
    push!(recall, recalli)
    push!(specifity, specifityi)
    push!(precision, precisioni)
    push!(mcc, mcci)
    push!(f1, f1i)
    push!(npv, npvi)
    
    writedlm("DeepPPI_SepModel"*string(i)*".csv", map(Array, w))
end

(:epoch, 10, :trn, 0.5876277139208174, :dev, 0.5860933048433049)
(:epoch, 20, :trn, 0.9664737654320987, :dev, 0.9352475071225071)
(:epoch, 30, :trn, 0.9736496913580247, :dev, 0.9369569088319089)
 46.285533 seconds (33.16 M allocations: 1.285 GiB, 12.97% gc time)
Dataset1
Training: min. loss =0.07222839, min. error =0.026350308641975295
Test: min. loss =0.2158853, min. error =0.06304309116809115
TP: 3951 , TN: 4133 , FP: 380 , FN: 164
Model evaluation:
Accuracy : 0.9369494667435153
Precision : 0.9122604477228675
NPV : 0.9618338373372506
Sensitivity / Recall : 0.9601458077861127
Specifity : 0.9157988032537561
MCC : 0.8750189593951528
F1 : 0.9355908121080285
(:epoch, 10, :trn, 0.7357386121753938, :dev, 0.735176282051282)
(:epoch, 20, :trn, 0.9678932524478502, :dev, 0.9276976495726496)
(:epoch, 30, :trn, 0.976929012345679, :dev, 0.9343839031339031)
 68.475417 seconds (32.30 M allocations: 1.235 GiB, 39.77% gc time)
Dataset2
Training: min. loss =0.06568445, min. error =0.02307098765432103
T

In [5]:
#summary(accuracy)
open("DeepPPI-Sep_Scores.txt", "w") do f
    write(f, "Dataset \tAccuracy\t\t\tPrecision\t\t\tnpv      \t\t\tRecall   \t\t\tSpecifity\t\t\tMCC\n")
    write(f, "__________________________________________________________________________________________________________________________________\n")
    for i in 1:5
        write(f, "dataset"*string(i)*"\t"*string(accuracy[i]) *"\t"* string(precision[i]) *"\t"* string(npv[i]) *"\t"* string(recall[i]) *"\t"*  string(specifity[i]) *"\t"*  string(mcc[i]) *"\n")
    end
    write(f, "__________________________________________________________________________________________________________________________________\n")
    write(f, "Average"*"\t\t"*string(mean(accuracy)) *"\t"* string(mean(precision))  *"\t"* string(mean(npv)) *"\t"* string(mean(recall)) *"\t"*  string(mean(specifity)) *"\t"*  string(mean(mcc)) *"\n")
end;
#println("Accuracy", "   Precision", "   npv"," recall", "    specifity", "     mcc", "       f1")
#(hcat(accuracy, precision, npv, recall, specifity, mcc, f1))

In [6]:
w_sizes = Tuple{Int64,Int64}[(512, 1164) (512, 1) (256, 512) (256, 1) (128, 256) (128, 1) (512, 1164) (512, 1) (256, 512) (256, 1) (128, 256) (128, 1) (128, 256) (128, 1) (2, 128) (2, 1)]
totalNoOfParams = 0;
for i in 1:length(w_sizes)
    totalNoOfParams += w_sizes[i][1] * w_sizes[i][2]
end
println("Total Number of trainable parameters: ", totalNoOfParams)


Total Number of trainable parameters: 1554562


## The following code is used while training to test different hyper parameters

In [None]:
# used while training to test different hyper parameters
plot([trnerr tsterr],ylim=(0.00,0.55),
    labels=[:trnDeepPPI_Sep :tstDeepPPI_Sep],xlabel="Epochs",ylabel="Error")  

In [None]:
# used while training to test different hyper parameters
plot([trnloss tstloss],ylim=(0.0,0.8),
    labels=[:trnDeepPPI_Sep :tstDeepPPI_Sep],xlabel="Epochs",ylabel="Loss")  

In [None]:
# Training Set
trnacc = 0;
for (x, y) in dtrn
    ypred = predictSep(w,x)
    trnacc += accuracyi(ypred, y) 
end
println("Training Dataset")
println("Accuracy: ", trnacc/length(dtrn));
println("Loss: ", loss(w, dtrn, predictSep));
modelevaluation(w, dtrn, predictSep; p=true);

In [None]:
# dev Set
devacc = 0;
for (x, y) in ddev
    ypred = predictSep(w,x)
    devacc += accuracyi(ypred, y) 
end
println("Dev Dataset")
println("Accuracy: ", devacc/length(ddev))
println("Loss: ", loss(w, ddev, predictSep))
modelevaluation(w, ddev, predictSep; p=true);

In [None]:
# Test Set
tstacc = 0;
for (x, y) in dtst
    ypred = predictSep(w,x)
    tstacc += accuracyi(ypred, y) 
end
println("Test Dataset")
println("Accuracy: ", tstacc/length(dtst))
println("Loss: ", loss(w, dtst, predictSep))
modelevaluation(w, dtst, predictSep; p=true);