In [None]:
from funcx.sdk.client import FuncXClient

fxc = FuncXClient()

In [None]:
def process_one_target(files, target, N):
    import rdkit
    import base64
    from rdkit.Chem import AllChem
    from rdkit import Chem
    from rdkit import RDLogger
    from rdkit import DataStructs
    from pstats import SortKey
    import pickle
    from operator import itemgetter
    import csv

    try:
        bit_target = AllChem.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(target), 2, nBits=2048)
    except:
        return None
    
    best_so_far = [('', 0.0) for index in range(N)] 
    for file in files:
        if file.endswith('pkl'):
            read = pickle.load( open(file, 'rb') )
        else:
            with open(file, 'r') as f:
                reader = csv.reader(f)
                read = list(map(tuple, reader))
        fingerprint_set = []
        for sm, _, fp in read:
            # Next TRY here as some do not convert
            try:
                bv = DataStructs.ExplicitBitVect(base64.b64decode(fp))
            except:
                bv = None
            fingerprint_set += [(sm, bv)]
            
        # Find scores for non-None fingerprints in fingerprint set
        scores = []
        for (smile, fingerprint) in fingerprint_set:
            try:
                score = DataStructs.TanimotoSimilarity(fingerprint, bit_target)
                scores += [(smile, score)]
            except:
                pass

        sorted_scores = sorted(scores, key=itemgetter(1))
        new_list = sorted_scores[-N:]

        # Merge the lists
        index1 = N-1
        index2 = N-1
        merged = []
        for i in range(N):
            element1 = best_so_far[index1]
            element2 = new_list[index2]
            if element1[1] < element2[1]:
                merged += [element2]
                index2 -= 1
            else:
                merged += [element1]
                index1 -= 1
        merged.reverse()     
        best_so_far = merged
            
    return(best_so_far)

In [None]:
func_uuid = fxc.register_function(process_one_target,
                                  description="Search for N closest smiles by fingerprint")
print(func_uuid)

In [None]:
endpoint_uuid = '50215494-f309-445f-92b0-8a270fd1bcae'
test_files  = ['/home/chard/fingerprint/f1.csv', '/home/chard/fingerprint/f2.csv', '/home/chard/fingerprint/f3.csv']
target_smile = 'O=C(Nc1cccc(c1)S(=O)(=O)N1CCCCC1)CN1Cc2c(C1)cccc2'
N = 20
res = fxc.run(test_files, target_smile, N, endpoint_id=endpoint_uuid, function_id=func_uuid)
print(res)

In [None]:
r = fxc.get_result(res)
print (r)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook 

def plot_figure(label, target, results, N, count):
    plt.rcParams["figure.figsize"] = (8,6)
    plt.figure()
    plt.title('Top %d matches in %s for\n%s'%(N, label, target), fontsize=8)
    plt.ylim(0,1)
    scores_only = [x[1] for x in results]
    plt.step(np.arange(len(scores_only)), np.array(scores_only), linewidth=1)
    alphas = ''.join(c for c in target if c.isalpha() or c.isdigit() or c=='=' or c=='@')
    figure_name = 'fig_%s__%s__%d.pdf'%(label, alphas[0:30], count) # random.randint(1,100))
    #plt.savefig(figure_name)
    #plt.close()

In [None]:
plot_figure('test', target_smile, r, N, 10)