In [1]:
from protein_sequence import ProteinSequence
from mutation_strategy import MutationStrategy, MinLogitPosSub, BlosumWeightedSub
from evolution import Evolution
from model_singleton import ModelSingleton
import esm
from Bio.Align import substitution_matrices

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Source: https://github.com/facebookresearch/esm/tree/main?tab=readme-ov-file#esmfold
model, alphabet = esm.pretrained.load_model_and_alphabet("esm2_t33_650M_UR50D") 
batch_converter = alphabet.get_batch_converter()
blosum80 = substitution_matrices.load("BLOSUM80")

In [3]:
reference_seq = 'TTSAGESADPVTATVENYGGETQVQRRQHTDIAFILDRFVKVKPKEQVNVLDLMQIPAHTLVGALLRTATYYFSDLELAVKHEGDLTWVPNGAPETALDNTTNPTAYHKEPLTRLALPYTAPHRVLATVYNGSSKYGDTSTNNVRGDLQVLAQKAERTLPTSFNFGAIKATRVTELLYRMKRAETYCPRPLLAIQPSDARHKQRIVAPAKQ'

In [4]:
modelSingleton = ModelSingleton(model=model,alphabet=alphabet,batch_converter=batch_converter)

In [5]:
fmd_sequence = ProteinSequence("base",reference_seq)
mutation_strat = MinLogitPosSub() # BlosumWeightedSub(blosum_matrix=blosum80,multiplier=0.5)
evolution = Evolution(
    root_sequence = fmd_sequence,
    mutation_strategy = mutation_strat,
    max_generations=6)

In [6]:
evolution.evolve_sequence() 

Invalid amino acid candidate for mutation as it is the same as the current amino acid: S>S
Using the second best fit amino acid for this position: S>K
Position mutated = 139 with amino acid K
Invalid amino acid candidate for mutation as it is the same as the current amino acid: N>N
Using the second best fit amino acid for this position: N>T
Position mutated = 141 with amino acid T
Invalid amino acid candidate for mutation as it is the same as the current amino acid: T>T
Using the second best fit amino acid for this position: T>V
Position mutated = 140 with amino acid V
Invalid amino acid candidate for mutation as it is the same as the current amino acid: T>T
Using the second best fit amino acid for this position: T>N
Position mutated = 138 with amino acid N
Invalid amino acid candidate for mutation as it is the same as the current amino acid: K>K
Using the second best fit amino acid for this position: K>Q
Position mutated = 139 with amino acid Q
Invalid amino acid candidate for mutatio

In [7]:
fmd_sequence = ProteinSequence("base",reference_seq)
mutation_strat = BlosumWeightedSub(blosum_matrix=blosum80,multiplier=0.5)
evolution = Evolution(
    root_sequence = fmd_sequence,
    mutation_strategy = mutation_strat,
    max_generations=6)

In [8]:
evolution.evolve_sequence()   

Invalid amino acid candidate for mutation as it is the same as the current amino acid: S>S
Using the second best fit amino acid for this position: S>A
Position mutated = 139 with amino acid A
Invalid amino acid candidate for mutation as it is the same as the current amino acid: A>A
Using the second best fit amino acid for this position: A>Q
Position mutated = 139 with amino acid Q
Invalid amino acid candidate for mutation as it is the same as the current amino acid: T>T
Using the second best fit amino acid for this position: T>V
Position mutated = 140 with amino acid V
Invalid amino acid candidate for mutation as it is the same as the current amino acid: T>T
Using the second best fit amino acid for this position: T>S
Position mutated = 138 with amino acid S
Invalid amino acid candidate for mutation as it is the same as the current amino acid: S>S
Using the second best fit amino acid for this position: S>A
Position mutated = 138 with amino acid A
Invalid amino acid candidate for mutatio