In [1]:
!pip install git+https://github.com/evolutionaryscale/esm
!pip install py3Dmol

Collecting git+https://github.com/evolutionaryscale/esm
  Cloning https://github.com/evolutionaryscale/esm to c:\users\dinak\appdata\local\temp\pip-req-build-bt72tzkj
  Resolved https://github.com/evolutionaryscale/esm to commit 1561962f3ea465585c8ccf390722607b8b49f168
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'


  Running command git clone --filter=blob:none --quiet https://github.com/evolutionaryscale/esm 'C:\Users\dinak\AppData\Local\Temp\pip-req-build-bt72tzkj'




In [None]:
import py3Dmol
import numpy as np
import torch
import pandas as pd
from tqdm import tqdm
from esm.utils.structure.protein_chain import ProteinChain
from esm.models.esm3 import ESM3
from huggingface_hub import login
from esm.sdk import client
import requests
from bs4 import BeautifulSoup
from Bio.PDB import PDBParser, Superimposer, PDBIO, Structure
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from esm.sdk.api import (
    ESM3InferenceClient,
    ESMProtein,
    GenerationConfig,
)
# Will instruct you how to get an API key from huggingface hub, make one with "Read" permission.
login()
model: ESM3InferenceClient = ESM3.from_pretrained("esm3-open").to("cuda") # or "cpu"
url = "http://prodata.swmed.edu/ecod/af2_pdb/domain/"


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Fetching 22 files:   0%|          | 0/22 [00:00<?, ?it/s]

  state_dict = torch.load(


In [3]:
df = pd.read_csv('OMBB_data.csv')
df.head()

Unnamed: 0,id,strands,seq,seq_len
0,e1af6A1,18,VDFHGYARSGIGWTGSGGEQQCFQTTGAQSKYRLGNECETYAELKL...,421
1,e1kmoA2,22,IPQDFGIEAGVEGQLSPTSSQNNPKETHNLMVGGTADNGFGTALLY...,523
2,e1p4tA1,8,EGASGFYVQADAAHAKASSSLGSAKGFSPRISAGYRINDLRFAVDY...,155
3,e1prnA1,16,EISLNGYGRFGLQYVEDRGVGLEDTIISSRLRINIVGTTETDQGVT...,289
4,e1qd5A1,12,AVRGSIIANMLQEHDNPFTLYPYDTNYLIYTQTSDLNKEAIASYDW...,257


In [72]:
def getPdbId(id, url):
    try:
        response = requests.get(url + id)
        soup = BeautifulSoup(response.text, 'html.parser')
        pdb_id = None
        link = soup.find('a', title="Link to PDB")
        if link:
            href = link['href']
            pdb_id = href.split("structureId=")[-1]
        if pdb_id is None:
            print(f'No PDB ID found for {id}')
        return pdb_id
    except Exception as e:
        print(f'Error: {e}')
        return None


In [5]:
import pickle
import os

def load_protein_chains(cache_path):
  if os.path.exists(cache_path):
      with open(cache_path, 'rb') as file:
          protein_chains = pickle.load(file)
      print("Loaded list")
  else:
      protein_chains = []
      for idx,row in tqdm(df.iterrows(), total=len(df), desc='Fetching ProteinChains'):
          id = row['id']
          pdb_id = getPdbId(id, url)
          out_membraine_chain = ProteinChain.from_rcsb(pdb_id)
          protein_chains.append(out_membraine_chain)
      with open(cache_path, 'wb') as file:
          pickle.dump(protein_chains, file)
      print("List saved successfully!")

  return protein_chains

In [6]:
def view_protein_chain(protein_chain):
  view = py3Dmol.view(width=500, height=500)
  # py3Dmol requires the atomic coordinates to be in PDB format, so we convert the `ProteinChain` object to a PDB string
  pdb_str = protein_chain.to_pdb_string()
  # Load the PDB string into the `py3Dmol` view object
  view.addModel(pdb_str, "pdb")
  # Set the style of the protein chain
  view.setStyle({"cartoon": {"color": "spectrum"}})
  # Zoom in on the protein chain
  view.zoomTo()
  # Display the protein chain
  view.show()

In [51]:
def create_masked_protein_chain(protein_chain, mask_percent=0.1):
  mask_pos = int(len(protein_chain.sequence) * (1-mask_percent))
  mask_amount = len(protein_chain.sequence) - mask_pos
  # Create a mask for the sequence
  print(f"Masking {mask_percent * 100}% ({mask_amount}) chars at the end of the sequence.")
  sequence_prompt = protein_chain.sequence[:mask_pos] + ''.join(['_'] * mask_amount)
  print("Sequence prompt:", sequence_prompt)

  return sequence_prompt, mask_pos

In [8]:
def view_masked_protein_chain(mask_pos, pdb_str):
  view = py3Dmol.view(width=500, height=500)
  inds = np.arange(0,mask_pos)
  view.addModel(pdb_str, "pdb")
  view.setStyle({"cartoon": {"color": "lightgrey"}})
  motif_res_inds = (
      inds + 1
  ).tolist()  # residue indices are 1-indexed in PDB files, so we add 1 to the indices
  view.addStyle({"resi": motif_res_inds}, {"cartoon": {"color": "cyan"}})
  view.zoomTo()
  view.show()

In [9]:
def predict(sequence_prompt):
  sequence_generation_config = GenerationConfig(
      track="sequence",  # We want ESM3 to generate tokens for the sequence track
      num_steps=sequence_prompt.count("_")
      // 2,  # We'll use num(mask tokens) // 2 steps to decode the sequence
      temperature=0.5,  # We'll use a temperature of 0.5 to control the randomness of the decoding process
  )
  structure_prediction_config = GenerationConfig(
      track="structure",  # We want ESM3 to generate tokens for the structure track
      num_steps=len(sequence_prompt) // 8,
      temperature=0.7,
  )
  protein = ESMProtein(sequence=sequence_prompt)
  # Now, we can use the `generate` method of the model to decode the sequence
  sequence_generation = model.generate(protein, sequence_generation_config)
  print("Sequence Prompt:\n\t", protein.sequence)
  print("Generated sequence:\n\t", sequence_generation.sequence)
  structure_prediction_prompt = ESMProtein(sequence=sequence_generation.sequence)
  structure_prediction = model.generate(
      structure_prediction_prompt, structure_prediction_config
  )

  return structure_prediction

In [44]:
def view_aligned_structures(pdb1, pdb2, inds):
  # Display the aligned structures using py3Dmol
  view = py3Dmol.view(width=1000, height=500)
  view.addModel(pdb1, "pdb")
  view.addModel(pdb2, "pdb")
  view.setStyle({'model': 0}, {"cartoon": {"color": "lightgrey"}})
  view.setStyle({'model': 1}, {"cartoon": {"color": "lightgreen"}})
  view.addStyle(
      {"resi": (inds + 1).tolist()},
      {"cartoon": {"color": "cyan"}})
  view.zoomTo()
  view.show()

In [75]:
cache_path = 'protein_chains.pkl'

# Load protein data
protein_chains = load_protein_chains(cache_path)
rmsd_results = []

for protein_chain in protein_chains:
    print("Protein sequence length: {}".format(len(protein_chain)))
    print("Sequence: {}".format(protein_chain.sequence))

    # View loaded protein chain
    #print("Loaded protein:")
    #view_protein_chain(protein_chain)

    # Create masked protein chain (default mask - 10%)
    masked_protein, mask_pos = create_masked_protein_chain(protein_chain)

    # View masked protein
    #print("Masked protein:")
    #pdb_str = protein_chain.to_pdb_string()
    #view_masked_protein_chain(mask_pos, pdb_str)

    # Use ESM3 to predict protein structure of the masked protein
    structure_prediction = predict(masked_protein)

    # Convert the structure prediction to a ProteinChain object
    structure_prediction_chain = structure_prediction.to_protein_chain()

    # Align the generated structure with the original structure using the non-masked sequence
    inds = np.arange(0, mask_pos)
    aligned_chain = structure_prediction_chain.align(
        protein_chain, mobile_inds=inds, target_inds=inds)

    # View aligned structures
    pdb1 = aligned_chain.to_pdb_string()
    pdb2 = protein_chain.to_pdb_string()
    view_aligned_structures(pdb1, pdb2, inds)

    # Calculate RMSD on the masked part
    masked_inds = np.arange(mask_pos, len(protein_chain.sequence))
    crmsd_masked = aligned_chain.rmsd(protein_chain, mobile_inds=masked_inds, target_inds=masked_inds)
    print("cRMSD of the motif in the generated structure vs the original structure (masked part): ", crmsd_masked)

    # Save cRSMD to data frame
    rmsd_results.append(crmsd_masked)

df['crmsd'] = rmsd_results
df.to_csv('OMBB_data_crmsd.csv', index=False)







Loaded list
Protein sequence length: 421
Sequence: VDFHGYARSGIGWTGSGGEQQCFQTTGAQSKYRLGNECETYAELKLGQEVWKEGDKSFYFDTNVAYSVAQQNDWEATDPAFREANVQGKNLIEWLPGSTIWAGKRFYQRHDVHMIDFYYWDISGPGAGLENIDVGFGKLSLAATRSSEAGGSSSFASNNIYDYTNETANDVFDVRLAQMEINPGGTLELGVDYGRANLRDNYRLVDGASKDGWLFTAEHTQSVLKGFNKFVVQYATDSMTSQGKGLSQGSGVAFDNEKFAYNINNNGHMLRILDHGAISMGDNWDMMYVGMYQDINWDNDNGTKWWTVGIRPMYKWTPIMSTVMEIGYDNVESQRTGDKNNQYKITLAQQWQAGDSIWSRPAIRVFATYAKWDEKWGYDYTGNADNNANFGKAVPADFNGGSFGRGDSDEWTFGAQMEIWW
Masking 10.0% (43) chars at the end of the sequence.
Sequence prompt: VDFHGYARSGIGWTGSGGEQQCFQTTGAQSKYRLGNECETYAELKLGQEVWKEGDKSFYFDTNVAYSVAQQNDWEATDPAFREANVQGKNLIEWLPGSTIWAGKRFYQRHDVHMIDFYYWDISGPGAGLENIDVGFGKLSLAATRSSEAGGSSSFASNNIYDYTNETANDVFDVRLAQMEINPGGTLELGVDYGRANLRDNYRLVDGASKDGWLFTAEHTQSVLKGFNKFVVQYATDSMTSQGKGLSQGSGVAFDNEKFAYNINNNGHMLRILDHGAISMGDNWDMMYVGMYQDINWDNDNGTKWWTVGIRPMYKWTPIMSTVMEIGYDNVESQRTGDKNNQYKITLAQQWQAGDSIWSRPAIRVFATYAKWDEKWGY___________________________________________


100%|██████████| 21/21 [00:12<00:00,  1.69it/s]


Sequence Prompt:
	 VDFHGYARSGIGWTGSGGEQQCFQTTGAQSKYRLGNECETYAELKLGQEVWKEGDKSFYFDTNVAYSVAQQNDWEATDPAFREANVQGKNLIEWLPGSTIWAGKRFYQRHDVHMIDFYYWDISGPGAGLENIDVGFGKLSLAATRSSEAGGSSSFASNNIYDYTNETANDVFDVRLAQMEINPGGTLELGVDYGRANLRDNYRLVDGASKDGWLFTAEHTQSVLKGFNKFVVQYATDSMTSQGKGLSQGSGVAFDNEKFAYNINNNGHMLRILDHGAISMGDNWDMMYVGMYQDINWDNDNGTKWWTVGIRPMYKWTPIMSTVMEIGYDNVESQRTGDKNNQYKITLAQQWQAGDSIWSRPAIRVFATYAKWDEKWGY___________________________________________
Generated sequence:
	 VDFHGYARSGIGWTGSGGEQQCFQTTGAQSKYRLGNECETYAELKLGQEVWKEGDKSFYFDTNVAYSVAQQNDWEATDPAFREANVQGKNLIEWLPGSTIWAGKRFYQRHDVHMIDFYYWDISGPGAGLENIDVGFGKLSLAATRSSEAGGSSSFASNNIYDYTNETANDVFDVRLAQMEINPGGTLELGVDYGRANLRDNYRLVDGASKDGWLFTAEHTQSVLKGFNKFVVQYATDSMTSQGKGLSQGSGVAFDNEKFAYNINNNGHMLRILDHGAISMGDNWDMMYVGMYQDINWDNDNGTKWWTVGIRPMYKWTPIMSTVMEIGYDNVESQRTGDKNNQYKITLAQQWQAGDSIWSRPAIRVFATYAKWDEKWGYGGGGGYETSYNSASNLTTFQGGTAAPSDKDSAFSFGVQAEAWW


100%|██████████| 52/52 [00:31<00:00,  1.67it/s]


cRMSD of the motif in the generated structure vs the original structure (masked part):  5.354820767550806
Protein sequence length: 661
Sequence: ALTVVGDWLGDARENDVFEHAGARDVIRREDFAKTGATTMREVLNRIPGVSAPENNGTGSHDLAMNFGIRGLNPRLASRSTVLMDGIPVPFAPYGQPQLSLAPVSLGNMDAIDVVRGGGAVRYGPQSVGGVVNFVTRAIPQDFGIEAGVEGQLSPTSSQNNPKETHNLMVGGTADNGFGTALLYSGTRGSDWREHSATRIDDLMLKSKYAPDEVHTFNSLLQYYDGEADMPGGLSRADYDADRWQSTRPYDRFWGRRKLASLGYQFQPDSQHKFNIQGFYTQTLRSGYLEQGKRITLSPRNYWVRGIEPRYSQIFMIGPSAHEVGVGYRYLNESTHEMRYYTATSSGQLPSGSSPYDRDTRSGTEAHAWYLDDKIDIGNWTITPGMRFEHIESYQNNAITGTHEEVSYNAPLPALNVLYHLTDSWNLYANTEGSFGTVQYSQIGKAVQSGNVEPEKARTWELGTRYDDGALTAEMGLFLINFNNQYDSNQTNDTVTARGKTRHTGLETQARYDLGTLTPTLDNVSIYASYAYVNAEIREKGDTYGNLVPFSPKHKGTLGVDYKPGNWTFNLNSDFQSSQFADNANTVKESADGSTGRIPGFMLWGARVAYDFGPQMADLNLAFGVKNIFDQDYFIRSYDDNNKGIYAGQPRTLYMQGSLKF
Masking 10.0% (67) chars at the end of the sequence.
Sequence prompt: ALTVVGDWLGDARENDVFEHAGARDVIRREDFAKTGATTMREVLNRIPGVSAPENNGTGSHDLAMNFGIRGLNPRLASRSTVLMDGIPVPFAPYGQPQLSLAPVSLGNMDAIDVVRGGGAVRY

100%|██████████| 33/33 [00:34<00:00,  1.04s/it]


Sequence Prompt:
	 ALTVVGDWLGDARENDVFEHAGARDVIRREDFAKTGATTMREVLNRIPGVSAPENNGTGSHDLAMNFGIRGLNPRLASRSTVLMDGIPVPFAPYGQPQLSLAPVSLGNMDAIDVVRGGGAVRYGPQSVGGVVNFVTRAIPQDFGIEAGVEGQLSPTSSQNNPKETHNLMVGGTADNGFGTALLYSGTRGSDWREHSATRIDDLMLKSKYAPDEVHTFNSLLQYYDGEADMPGGLSRADYDADRWQSTRPYDRFWGRRKLASLGYQFQPDSQHKFNIQGFYTQTLRSGYLEQGKRITLSPRNYWVRGIEPRYSQIFMIGPSAHEVGVGYRYLNESTHEMRYYTATSSGQLPSGSSPYDRDTRSGTEAHAWYLDDKIDIGNWTITPGMRFEHIESYQNNAITGTHEEVSYNAPLPALNVLYHLTDSWNLYANTEGSFGTVQYSQIGKAVQSGNVEPEKARTWELGTRYDDGALTAEMGLFLINFNNQYDSNQTNDTVTARGKTRHTGLETQARYDLGTLTPTLDNVSIYASYAYVNAEIREKGDTYGNLVPFSPKHKGTLGVDYKPGNWTFNLNSDFQSSQFADNANTVKESADGS___________________________________________________________________
Generated sequence:
	 ALTVVGDWLGDARENDVFEHAGARDVIRREDFAKTGATTMREVLNRIPGVSAPENNGTGSHDLAMNFGIRGLNPRLASRSTVLMDGIPVPFAPYGQPQLSLAPVSLGNMDAIDVVRGGGAVRYGPQSVGGVVNFVTRAIPQDFGIEAGVEGQLSPTSSQNNPKETHNLMVGGTADNGFGTALLYSGTRGSDWREHSATRIDDLMLKSKYAPDEVHTFNSLLQYYDGEADMPGGLSRADYDADRWQSTRPYDRFWGRRKLASLGYQFQPDSQHKFNIQGFYTQTLRSGYLEQGKRITL

100%|██████████| 82/82 [01:25<00:00,  1.04s/it]


cRMSD of the motif in the generated structure vs the original structure (masked part):  2.3028502721516575
Protein sequence length: 155
Sequence: EGASGFYVQADAAHAKASSSLGSAKGFSPRISAGYRINDLRFAVDYTRYKNYKAPSTDFKLYSIGASAIYDFDTQSPVKPYLGARLSLNRASVDLGGSDSFSQTSIGLGVLTGVSYAVTPNVDLDAGYRYNYIGKVNTVKNVRSGELSAGVRVKF
Masking 10.0% (16) chars at the end of the sequence.
Sequence prompt: EGASGFYVQADAAHAKASSSLGSAKGFSPRISAGYRINDLRFAVDYTRYKNYKAPSTDFKLYSIGASAIYDFDTQSPVKPYLGARLSLNRASVDLGGSDSFSQTSIGLGVLTGVSYAVTPNVDLDAGYRYNYIGKVNTV________________


100%|██████████| 8/8 [00:01<00:00,  4.21it/s]


Sequence Prompt:
	 EGASGFYVQADAAHAKASSSLGSAKGFSPRISAGYRINDLRFAVDYTRYKNYKAPSTDFKLYSIGASAIYDFDTQSPVKPYLGARLSLNRASVDLGGSDSFSQTSIGLGVLTGVSYAVTPNVDLDAGYRYNYIGKVNTV________________
Generated sequence:
	 EGASGFYVQADAAHAKASSSLGSAKGFSPRISAGYRINDLRFAVDYTRYKNYKAPSTDFKLYSIGASAIYDFDTQSPVKPYLGARLSLNRASVDLGGSDSFSQTSIGLGVLTGVSYAVTPNVDLDAGYRYNYIGKVNTVGKTTVHEVRLGARYSF


100%|██████████| 19/19 [00:04<00:00,  4.21it/s]


cRMSD of the motif in the generated structure vs the original structure (masked part):  0.32062552274582484
Protein sequence length: 289
Sequence: EISLNGYGRFGLQYVEDRGVGLEDTIISSRLRINIVGTTETDQGVTFGAKLRMQWDDGDAFAGTAGNAAQFWTSYNGVTVSVGNVDTAFDSVALTYDSEMGYEASSFGDAQSSFFAYNSKYDASGALDNYNGIAVTYSISGVNLYLSYVDPDQTVDSSLVTEEFGIAADWSNDMISLAAAYTTDAGGIVDNDIAFVGAAYKFNDAGTVGLNWYDNGLSTAGDQVTLYGNYAFGATTVRAYVSDIDRAGADTAYGIGADYQFAEGVKVSGSVQSGFANETVADVGVRFDF
Masking 10.0% (29) chars at the end of the sequence.
Sequence prompt: EISLNGYGRFGLQYVEDRGVGLEDTIISSRLRINIVGTTETDQGVTFGAKLRMQWDDGDAFAGTAGNAAQFWTSYNGVTVSVGNVDTAFDSVALTYDSEMGYEASSFGDAQSSFFAYNSKYDASGALDNYNGIAVTYSISGVNLYLSYVDPDQTVDSSLVTEEFGIAADWSNDMISLAAAYTTDAGGIVDNDIAFVGAAYKFNDAGTVGLNWYDNGLSTAGDQVTLYGNYAFGATTVRAYVSDIDRAGADTAYGIGADYQ_____________________________


100%|██████████| 14/14 [00:05<00:00,  2.46it/s]


Sequence Prompt:
	 EISLNGYGRFGLQYVEDRGVGLEDTIISSRLRINIVGTTETDQGVTFGAKLRMQWDDGDAFAGTAGNAAQFWTSYNGVTVSVGNVDTAFDSVALTYDSEMGYEASSFGDAQSSFFAYNSKYDASGALDNYNGIAVTYSISGVNLYLSYVDPDQTVDSSLVTEEFGIAADWSNDMISLAAAYTTDAGGIVDNDIAFVGAAYKFNDAGTVGLNWYDNGLSTAGDQVTLYGNYAFGATTVRAYVSDIDRAGADTAYGIGADYQ_____________________________
Generated sequence:
	 EISLNGYGRFGLQYVEDRGVGLEDTIISSRLRINIVGTTETDQGVTFGAKLRMQWDDGDAFAGTAGNAAQFWTSYNGVTVSVGNVDTAFDSVALTYDSEMGYEASSFGDAQSSFFAYNSKYDASGALDNYNGIAVTYSISGVNLYLSYVDPDQTVDSSLVTEEFGIAADWSNDMISLAAAYTTDAGGIVDNDIAFVGAAYKFNDAGTVGLNWYDNGLSTAGDQVTLYGNYAFGATTVRAYVSDIDRAGADTAYGIGADYQLNSNATLYAGYGEDILDREVVEAGVRFNF


100%|██████████| 36/36 [00:14<00:00,  2.45it/s]


cRMSD of the motif in the generated structure vs the original structure (masked part):  0.5360496147407665
Protein sequence length: 257
Sequence: AVRGSIIANMLQEHDNPFTLYPYDTNYLIYTQTSDLNKEAIASYDWAENARKDEVKFQLSLAFPLWRGILGPNSVLGASYTQKSWWQLSNSEESSPFRETNYEPQLFLGFATDYRFAGWTLRDVEMGYNHDSNGRSDPTSRSWNRLYTRLMAENGNWLVEVKPWYVVGNTDDNPDITKYMGYYQLKIGYHLGDAVLSAKGQYNWNTGYGGAELGLSYPITKHVRLYTQVYSGYGESLIDYNFNQTRVGVGVMLNDLF
Masking 10.0% (26) chars at the end of the sequence.
Sequence prompt: AVRGSIIANMLQEHDNPFTLYPYDTNYLIYTQTSDLNKEAIASYDWAENARKDEVKFQLSLAFPLWRGILGPNSVLGASYTQKSWWQLSNSEESSPFRETNYEPQLFLGFATDYRFAGWTLRDVEMGYNHDSNGRSDPTSRSWNRLYTRLMAENGNWLVEVKPWYVVGNTDDNPDITKYMGYYQLKIGYHLGDAVLSAKGQYNWNTGYGGAELGLSYPITKHVRLYTQVYS__________________________


100%|██████████| 13/13 [00:05<00:00,  2.56it/s]


Sequence Prompt:
	 AVRGSIIANMLQEHDNPFTLYPYDTNYLIYTQTSDLNKEAIASYDWAENARKDEVKFQLSLAFPLWRGILGPNSVLGASYTQKSWWQLSNSEESSPFRETNYEPQLFLGFATDYRFAGWTLRDVEMGYNHDSNGRSDPTSRSWNRLYTRLMAENGNWLVEVKPWYVVGNTDDNPDITKYMGYYQLKIGYHLGDAVLSAKGQYNWNTGYGGAELGLSYPITKHVRLYTQVYS__________________________
Generated sequence:
	 AVRGSIIANMLQEHDNPFTLYPYDTNYLIYTQTSDLNKEAIASYDWAENARKDEVKFQLSLAFPLWRGILGPNSVLGASYTQKSWWQLSNSEESSPFRETNYEPQLFLGFATDYRFAGWTLRDVEMGYNHDSNGRSDPTSRSWNRLYTRLMAENGNWLVEVKPWYVVGNTDDNPDITKYMGYYQLKIGYHLGDAVLSAKGQYNWNTGYGGAELGLSYPITKHVRLYTQVYSGYGESLLDYNHRQTTYGVGLVLTDWF


100%|██████████| 32/32 [00:12<00:00,  2.55it/s]


cRMSD of the motif in the generated structure vs the original structure (masked part):  0.3756783559401987
Protein sequence length: 148
Sequence: ATSTVTGGYAQSDAQGQMNKMGGFNLKYRYEEDNSPLGVIGSFTYTEKSRTASSGDYNKNQYYGITAGPAYRINDWASIYGVVGVGYGKFQTTEYPTYKNDTSDYGFSYGAGLQFNPMENVALDFSYEQSRIRSVDVGTWIAGVGYRF
Masking 10.0% (15) chars at the end of the sequence.
Sequence prompt: ATSTVTGGYAQSDAQGQMNKMGGFNLKYRYEEDNSPLGVIGSFTYTEKSRTASSGDYNKNQYYGITAGPAYRINDWASIYGVVGVGYGKFQTTEYPTYKNDTSDYGFSYGAGLQFNPMENVALDFSYEQSRIR_______________


100%|██████████| 7/7 [00:01<00:00,  4.26it/s]


Sequence Prompt:
	 ATSTVTGGYAQSDAQGQMNKMGGFNLKYRYEEDNSPLGVIGSFTYTEKSRTASSGDYNKNQYYGITAGPAYRINDWASIYGVVGVGYGKFQTTEYPTYKNDTSDYGFSYGAGLQFNPMENVALDFSYEQSRIR_______________
Generated sequence:
	 ATSTVTGGYAQSDAQGQMNKMGGFNLKYRYEEDNSPLGVIGSFTYTEKSRTASSGDYNKNQYYGITAGPAYRINDWASIYGVVGVGYGKFQTTEYPTYKNDTSDYGFSYGAGLQFNPMENVALDFSYEQSRIRDVDVDTWRAGVGYRF


100%|██████████| 18/18 [00:04<00:00,  4.21it/s]


cRMSD of the motif in the generated structure vs the original structure (masked part):  0.2918332534970484
Protein sequence length: 137
Sequence: APKDNTWYTGAKLGWSQHENKLGAGAFGGYQVNPYVGFEMGYDWLGRMPYAYKAQGVQLTAKLGYPITDDLDIYTRLGGMVWRADTYSNVYGKNHDTGVSPVFAGGVEYAITPEIATRLEYQWTNGMLSLGVSYRFG
Masking 10.0% (14) chars at the end of the sequence.
Sequence prompt: APKDNTWYTGAKLGWSQHENKLGAGAFGGYQVNPYVGFEMGYDWLGRMPYAYKAQGVQLTAKLGYPITDDLDIYTRLGGMVWRADTYSNVYGKNHDTGVSPVFAGGVEYAITPEIATRLEYQW______________


100%|██████████| 7/7 [00:01<00:00,  4.34it/s]


Sequence Prompt:
	 APKDNTWYTGAKLGWSQHENKLGAGAFGGYQVNPYVGFEMGYDWLGRMPYAYKAQGVQLTAKLGYPITDDLDIYTRLGGMVWRADTYSNVYGKNHDTGVSPVFAGGVEYAITPEIATRLEYQW______________
Generated sequence:
	 APKDNTWYTGAKLGWSQHENKLGAGAFGGYQVNPYVGFEMGYDWLGRMPYAYKAQGVQLTAKLGYPITDDLDIYTRLGGMVWRADTYSNVYGKNHDTGVSPVFAGGVEYAITPEIATRLEYQWYNLSDGIVNMTNLG


100%|██████████| 17/17 [00:03<00:00,  4.34it/s]


cRMSD of the motif in the generated structure vs the original structure (masked part):  3.4545426498030936
Protein sequence length: 427
Sequence: AGFQLNEFSSSGLGRAYSGEGAIADDAGNVSRNPALITMFDRPTFSAGAVYIDPDVNISGTSPSGRSLKADNIAPTAWVPNMHFVAPINDQFGWGASITSNYGLATEFNDTYAGGSVGGTTDLETMNLNLSGAYRLNNAWSFGLGFNAVYARAKIERFAGDLGQLVAGQIMQSPAGQTQQGQALAATANGIDSNTKIAHLNGNQWGFGWNAGILYELDKNNRYALTYRSEVKIDFKGNYSSDLNRAFNNYGLPIPTATGGATQSGYLTLNLPEMWEVSGYNRVDPQWAIHYSLAYTSWSQFQQLKATSTSGDTLFQKHEGFKDAYRIALGTTYYYDDNWTFRTGIAFDDSPVPAQNRSISIPDQDRFWLSAGTTYAFNKDASVDVGVSYMHGQSVKINEGPYQFESEGKAWLFGTNFNYAFHHHHHH
Masking 10.0% (43) chars at the end of the sequence.
Sequence prompt: AGFQLNEFSSSGLGRAYSGEGAIADDAGNVSRNPALITMFDRPTFSAGAVYIDPDVNISGTSPSGRSLKADNIAPTAWVPNMHFVAPINDQFGWGASITSNYGLATEFNDTYAGGSVGGTTDLETMNLNLSGAYRLNNAWSFGLGFNAVYARAKIERFAGDLGQLVAGQIMQSPAGQTQQGQALAATANGIDSNTKIAHLNGNQWGFGWNAGILYELDKNNRYALTYRSEVKIDFKGNYSSDLNRAFNNYGLPIPTATGGATQSGYLTLNLPEMWEVSGYNRVDPQWAIHYSLAYTSWSQFQQLKATSTSGDTLFQKHEGFKDAYRIALGTTYYYDDNWTFRTGIAFDDSPVPAQN

100%|██████████| 21/21 [00:13<00:00,  1.60it/s]


Sequence Prompt:
	 AGFQLNEFSSSGLGRAYSGEGAIADDAGNVSRNPALITMFDRPTFSAGAVYIDPDVNISGTSPSGRSLKADNIAPTAWVPNMHFVAPINDQFGWGASITSNYGLATEFNDTYAGGSVGGTTDLETMNLNLSGAYRLNNAWSFGLGFNAVYARAKIERFAGDLGQLVAGQIMQSPAGQTQQGQALAATANGIDSNTKIAHLNGNQWGFGWNAGILYELDKNNRYALTYRSEVKIDFKGNYSSDLNRAFNNYGLPIPTATGGATQSGYLTLNLPEMWEVSGYNRVDPQWAIHYSLAYTSWSQFQQLKATSTSGDTLFQKHEGFKDAYRIALGTTYYYDDNWTFRTGIAFDDSPVPAQNRSISIPDQDRFWLSAGTTYAFNKDASVD___________________________________________
Generated sequence:
	 AGFQLNEFSSSGLGRAYSGEGAIADDAGNVSRNPALITMFDRPTFSAGAVYIDPDVNISGTSPSGRSLKADNIAPTAWVPNMHFVAPINDQFGWGASITSNYGLATEFNDTYAGGSVGGTTDLETMNLNLSGAYRLNNAWSFGLGFNAVYARAKIERFAGDLGQLVAGQIMQSPAGQTQQGQALAATANGIDSNTKIAHLNGNQWGFGWNAGILYELDKNNRYALTYRSEVKIDFKGNYSSDLNRAFNNYGLPIPTATGGATQSGYLTLNLPEMWEVSGYNRVDPQWAIHYSLAYTSWSQFQQLKATSTSGDTLFQKHEGFKDAYRIALGTTYYYDDNWTFRTGIAFDDSPVPAQNRSISIPDQDRFWLSAGTTYAFNKDASVDFAYSHLFVKDAKVDQNYVAGGGKVTGTYKGHVDMFGLQFNYRF


  8%|▊         | 4/53 [00:02<00:31,  1.54it/s]


KeyboardInterrupt: 