In [1]:
from protein_sequence import ProteinSequence
from mutation_strategy import MutationStrategy, MinLogitPosSub, BlosumWeightedSub
from evolution import Evolution
from evaluation_strategy import EvaluationStrategy
from model_singleton import ModelSingleton
import esm
from Bio.Align import substitution_matrices

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Source: https://github.com/facebookresearch/esm/tree/main?tab=readme-ov-file#esmfold
model, alphabet = esm.pretrained.load_model_and_alphabet("esm2_t33_650M_UR50D") 
batch_converter = alphabet.get_batch_converter()
blosum80 = substitution_matrices.load("BLOSUM80")

In [3]:
reference_seq = 'TTSAGESADPVTATVENYGGETQVQRRQHTDIAFILDRFVKVKPKEQVNVLDLMQIPAHTLVGALLRTATYYFSDLELAVKHEGDLTWVPNGAPETALDNTTNPTAYHKEPLTRLALPYTAPHRVLATVYNGSSKYGDTSTNNVRGDLQVLAQKAERTLPTSFNFGAIKATRVTELLYRMKRAETYCPRPLLAIQPSDARHKQRIVAPAKQ'

In [4]:
modelSingleton = ModelSingleton(model=model,alphabet=alphabet,batch_converter=batch_converter)

In [5]:
fmd_sequence = ProteinSequence("base",reference_seq)
eval_strat = EvaluationStrategy()
mutation_strat = MinLogitPosSub(mutations_per_seq=2) # BlosumWeightedSub(blosum_matrix=blosum80,multiplier=0.5)
evolution = Evolution(
    root_sequence = fmd_sequence,
    mutation_strategy = mutation_strat,
    evaluation_strategy = eval_strat,
    max_generations=6)

In [6]:
evolution.evolve_sequence() 

Potential mutations: [ 4 11]
Gen 0: current seq muatted at pos 139 with aa S, parent seq has mutation None
Mutation Score: 0.9047226905822754
Potential mutations: [ 4 11]
Gen 1: current seq muatted at pos 139 with aa S, parent seq has mutation 139S
Mutation Score: 0.9047226905822754
Potential mutations: [ 4 11]
Gen 2: current seq muatted at pos 139 with aa S, parent seq has mutation 139S
Mutation Score: 0.9047226905822754
Potential mutations: [ 4 11]
Gen 3: current seq muatted at pos 139 with aa S, parent seq has mutation 139S
Mutation Score: 0.9047226905822754
Potential mutations: [ 4 11]
Gen 4: current seq muatted at pos 139 with aa S, parent seq has mutation 139S
Mutation Score: 0.9047226905822754
Potential mutations: [ 4 11]
Gen 5: current seq muatted at pos 139 with aa S, parent seq has mutation 139S
Mutation Score: 0.9047226905822754
Gen 5: current seq muatted at pos 139 with aa K, parent seq has mutation 139S
Mutation Score: 0.8034502863883972
Gen 4: current seq muatted at pos 1

In [7]:
# fmd_sequence = ProteinSequence("base",reference_seq)
# mutation_strat = BlosumWeightedSub(blosum_matrix=blosum80,multiplier=0.5) ########### TO BE UPDATED TO WORK WITH MULTIPLE POTENTIAL MUTATIONS
# evolution = Evolution(
#     root_sequence = fmd_sequence,
#     mutation_strategy = mutation_strat,
#     evaluation_strategy = eval_strat,
#     max_generations=6)

In [8]:
# evolution.evolve_sequence()   