In [1]:
from Bio import SeqIO
from DisorderPredictors import VSL2b

In [2]:
vsl2b = VSL2b()

In [3]:
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import pickle

def predict(seq_list, predictor, out_path, max_workers=2):
    n = len(seq_list)
    result = []
    with ThreadPoolExecutor(max_workers) as executor:
        for ac, res, reg in tqdm(executor.map(predictor.run, seq_list), total=n):
            if res is None:
                print("error: ", ac)
                continue  # preskacemo
            
            res = [p for _,p in res]
            reg = [(s, e, e-s) for s,e in reg] # precalculate length
            result.append( (ac, res, reg) )
      
    with open(out_path, "wb") as f:
        pickle.dump(result, f)


#result = predict(valid_proteins[:10], vsl2b, 'vsl2b_valid.pickle')

###  Run predictions

In [61]:
valid_proteins = [ (r.id, str(r.seq)) for r in
                   SeqIO.parse("data/CAFA3_training_data/valid_proteins.fasta", "fasta")]

#predict(valid_proteins, vsl2b, 'vsl2b_valid.pickle')
del valid_proteins

In [59]:
random_proteins = [ (r.id, str(r.seq)) for r in
                   SeqIO.parse("data/CAFA3_training_data/random_model_proteins.fasta", "fasta")]

#predict(random_proteins, vsl2b, 'vsl2b_random.pickle')
del random_proteins

In [4]:
uniform_proteins = [ (r.id, str(r.seq)) for r in
                   SeqIO.parse("data/CAFA3_training_data/uniform_model_proteins.fasta", "fasta")]

#predict(uniform_proteins, vsl2b, 'uniform_model.pickle')
del uniform_proteins