## TPR by normalized rank curve

In [None]:
import pickle
import numpy as np
rank_stats = {}
taxon = 'virus'
with open('stats/rank_stats_GO_' + taxon + '.pckl','rb') as f:
    rank_stats['go'] = pickle.load(f)
with open('stats/rank_stats_HP_' + taxon + '.pckl','rb') as f:
    rank_stats['hp'] = pickle.load(f)
with open('stats/rank_stats_MP_' + taxon + '.pckl','rb') as f:
    rank_stats['mp'] = pickle.load(f)
with open('stats/rank_stats_HPiMPiGO_' + taxon + '.pckl','rb') as f:
    rank_stats['hpimpigo'] = pickle.load(f)
with open('stats/rank_stats_HPuMPuGO_' + taxon + '.pckl','rb') as f:
    rank_stats['hpumpugo'] = pickle.load(f)

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

x_dict = {'go':[], 'hp':[], 'mp':[], 'hpimpigo':[], 'hpumpugo':[]}
y_dict = {'go':[], 'hp':[], 'mp':[], 'hpimpigo':[], 'hpumpugo':[]}

max_auc = {'go':0, 'hp':0, 'mp':0, 'hpimpigo':0, 'hpumpugo':0}

for experiment in max_auc.keys():
    print(experiment)
    rank_counts = []
    epochs = 200
    for i in range(epochs):
        rank_counts.append(dict())
    for i in range(epochs):
        for patho, ranks in rank_stats[experiment][1][i].items():
            for rank in ranks:
                if rank not in rank_counts[i]:
                    rank_counts[i][rank] = 0
                rank_counts[i][rank]+=1
        auc_x = list(rank_counts[i].keys())
        auc_x.sort()
        auc_y = []
        tpr = 0
        step = 1/sum(rank_counts[i].values())
        for x in auc_x:
            tpr += rank_counts[i][x]*step
            auc_y.append(tpr)
        auc_x.append(rank_stats[experiment][0])
        auc_y.append(1)
        auc = np.trapz(auc_y, auc_x)/rank_stats[experiment][0]
        if auc > max_auc[experiment]:
            max_auc[experiment] = auc
            x_dict[experiment] = np.array(auc_x)/rank_stats[experiment][0]
            y_dict[experiment] = auc_y
            print('Rank based auc is: %f' % (auc)) 

In [None]:
fig=plt.figure(figsize=(4, 4), dpi= 200)
plt.plot(x_dict['go'], y_dict['go'], label = 'GO (AUC=' + '%.3f)' % max_auc['go'])
plt.plot(x_dict['hp'], y_dict['hp'], label = 'HP (AUC=' + '%.3f)' % max_auc['hp'])
plt.plot(x_dict['mp'], y_dict['mp'], label = 'MP (AUC=' + '%.3f)' % max_auc['mp'])
plt.plot(x_dict['hpimpigo'], y_dict['hpimpigo'], label = 'HP$\cap$MP$\cap$GO (AUC=' + '%.3f)' % max_auc['hpimpigo'])
plt.plot(x_dict['hpumpugo'], y_dict['hpumpugo'], label = r'HP$\cup$MP$\cup$GO (AUC=' + '%.3f)' % max_auc['hpumpugo'])
plt.plot([0, 1], [0, 1], '--', label = 'Random (AUC= 0.50)')
plt.legend()
plt.xlabel('Normalized rank')
plt.ylabel('TPR')
plt.axis('scaled')
plt.show()

## AUC-Epoch curve

In [None]:
import pickle
import numpy as np
rank_stats = {}
taxon = 'virus'
with open('stats/rank_stats_GO_' + taxon + '.pckl','rb') as f:
    rank_stats['go'] = pickle.load(f)
with open('stats/rank_stats_HP_' + taxon + '.pckl','rb') as f:
    rank_stats['hp'] = pickle.load(f)
with open('stats/rank_stats_MP_' + taxon + '.pckl','rb') as f:
    rank_stats['mp'] = pickle.load(f)
with open('stats/rank_stats_HPiMPiGO_' + taxon + '.pckl','rb') as f:
    rank_stats['hpimpigo'] = pickle.load(f)
with open('stats/rank_stats_HPuMPuGO_' + taxon + '.pckl','rb') as f:
    rank_stats['hpumpugo'] = pickle.load(f)

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

auc_dict = {'go':[], 'hp':[], 'mp':[], 'hpimpigo':[], 'hpumpugo':[]}
epoch_dict = {'go':[], 'hp':[], 'mp':[], 'hpimpigo':[], 'hpumpugo':[]}

max_auc = {'go':0, 'hp':0, 'mp':0, 'hpimpigo':0, 'hpumpugo':0}

for experiment in max_auc.keys():
    print(experiment)
    rank_counts = []
    epochs = 200
    for i in range(epochs):
        rank_counts.append(dict())
    for i in range(epochs):
        for patho, ranks in rank_stats[experiment][1][i].items():
            for rank in ranks:
                if rank not in rank_counts[i]:
                    rank_counts[i][rank] = 0
                rank_counts[i][rank]+=1
        auc_x = list(rank_counts[i].keys())
        auc_x.sort()
        auc_y = []
        tpr = 0
        step = 1/sum(rank_counts[i].values())
        for x in auc_x:
            tpr += rank_counts[i][x]*step
            auc_y.append(tpr)
        auc_x.append(rank_stats[experiment][0])
        auc_y.append(1)
        auc = np.trapz(auc_y, auc_x)/rank_stats[experiment][0]
        auc_dict[experiment].append(auc)
        epoch_dict[experiment].append(i)

In [None]:
fig=plt.figure(figsize=(4, 4), dpi= 200)
plt.plot(epoch_dict['go'], auc_dict['go'], label = 'GO')
plt.plot(epoch_dict['hp'], auc_dict['hp'], label = 'HP')
plt.plot(epoch_dict['mp'], auc_dict['mp'], label = 'MP')
plt.plot(epoch_dict['hpimpigo'], auc_dict['hpimpigo'], label = 'HP$\cap$MP$\cap$GO')
plt.plot(epoch_dict['hpumpugo'], auc_dict['hpumpugo'], label = 'HP$\cup$MP$\cup$GO')
plt.legend(loc='lower right')
plt.xlabel('Epochs')
plt.ylabel('AUC')
#plt.axis('scaled')
plt.show()