In [5]:
import pandas as pd
import keras
from keras.models import load_model
import numpy as np
from tqdm import tqdm

In [6]:
df = pd.read_csv('./data/csv/test_acc10_300.csv')

In [7]:
# group by ligand
# for each group, compute top10 accuracy manually
def test_model(path):
    model = load_model(model_path)

    matches = 0
    total = float(len(df.lig_id.unique()))
    debug = False

    for lig_id, grp in tqdm(df.groupby('lig_id')):
        grp.reset_index(inplace=True)

        match_id = grp.index[grp['pro_id']==lig_id]

        # load all features
        probabilities = []
        n_channels = 2
        dims = (24,24,24)
        batch_size = 150

        for i in range(0, len(grp.index), batch_size):
            X = np.empty((batch_size, *dims, n_channels))
            sub_grp = grp[i*batch_size : i*batch_size+batch_size]
            for row in sub_grp.itertuples():
                X[row[0],] = np.load(row.dest)

            partial_probs = model.predict_on_batch(X)
            partial_probs = partial_probs.flatten()
            probabilities.extend(partial_probs)

        largest_first = reversed(np.argsort(probabilities).tolist())
        top10 = list(largest_first)[:10]

        if debug:
            print('ligid: ', lig_id)
            print('match_id: ', match_id)
            print('top_10: ', top10)
            print()

        if match_id in top10:
            matches += 1
            
    print(matches/total)    


In [4]:
model_path = './models/try_epochs_16.h5'
test_model(model_path)

0.4866666666666667


In [8]:
model_path = './models/try_epochs_5.h5'
test_model(model_path)

100%|██████████| 300/300 [05:23<00:00,  1.07s/it]

0.49





In [9]:
model_path = './models/try_epochs_1.h5'
test_model(model_path)

100%|██████████| 300/300 [05:15<00:00,  1.05s/it]

0.47





In [10]:
model_path = './models/basic_c_and_n_regression_hydrophobic_only.h5'
test_model(model_path)

100%|██████████| 300/300 [05:15<00:00,  1.07s/it]

0.4766666666666667



