In [3]:
# !pip install git+https://github.com/evolutionaryscale/esm
# !pip install py3Dmol

In [4]:
import py3Dmol
import numpy as np
import torch
import pandas as pd
from tqdm import tqdm
import pickle
import os
from esm.utils.structure.protein_chain import ProteinChain
from esm.models.esm3 import ESM3
from huggingface_hub import login
from esm.sdk import client
import requests
from bs4 import BeautifulSoup
from Bio.PDB import PDBParser, Superimposer, PDBIO, Structure
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import wandb
import tempfile
import plotly.graph_objects as go
from pathlib import Path
from urllib.parse import urljoin
from esm.sdk.api import (
    ESM3InferenceClient,
    ESMProtein,
    GenerationConfig,
    SamplingConfig
)
import umap
# Will instruct you how to get an API key from huggingface hub, make one with "Read" permission.
login()
model: ESM3InferenceClient = ESM3.from_pretrained("esm3-open").to("cuda") # or "cpu"
url = "http://prodata.swmed.edu/ecod/af2_pdb/domain/"


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
df = pd.read_csv('OMBB_Small.csv')
df.head()

Unnamed: 0,id,strands,seq,seq_len
0,e1af6A1,18,VDFHGYARSGIGWTGSGGEQQCFQTTGAQSKYRLGNECETYAELKL...,421
1,e1kmoA2,22,IPQDFGIEAGVEGQLSPTSSQNNPKETHNLMVGGTADNGFGTALLY...,523
2,e1p4tA1,8,EGASGFYVQADAAHAKASSSLGSAKGFSPRISAGYRINDLRFAVDY...,155
3,e1prnA1,16,EISLNGYGRFGLQYVEDRGVGLEDTIISSRLRINIVGTTETDQGVT...,289
4,e1qd5A1,12,AVRGSIIANMLQEHDNPFTLYPYDTNYLIYTQTSDLNKEAIASYDW...,257


In [6]:
def download_pdb_file(id, base_url, output_filename):
    try:
        # Request the page
        response = requests.get(base_url + id)
        response.raise_for_status()  # Raise HTTPError for bad responses
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the "Coordinates" link under "Download files"
        link = soup.find('a', string="Coordinates")
        if link:
            # Handle relative URL by combining with the base URL
            href = urljoin(base_url, link['href'])
            
            # Download the file
            coord_response = requests.get(href)
            coord_response.raise_for_status()  # Raise HTTPError for bad responses
            with open(output_filename, 'wb') as file:
                file.write(coord_response.content)
            print(f'Coordinates file downloaded successfully as {output_filename}')
        else:
            print(f'No Coordinates link found for {id}')
    except Exception as e:
        print(f'Error: {e}')



In [7]:
def getPdbId(id, url):
    try:
        response = requests.get(url + id)
        soup = BeautifulSoup(response.text, 'html.parser')
        pdb_id = None
        link = soup.find('a', title="Link to PDB")
        if link:
            href = link['href']
            pdb_id = href.split("structureId=")[-1]
        if pdb_id is None:
            print(f'No PDB ID found for {id}')
        return pdb_id
    except Exception as e:
        print(f'Error: {e}')
        return None

In [8]:

# error in files e2wjrA1, e3bryA1, e3qq2A1, e3sy7A2, e3szvA1, e4afkA1, e4c00A4, e4cu4A2, e4fqeA1, e4frxA1, e4fspA1, e4q35A2, e4rdrA2, e4rjwA1, e5dl5A1, e5fokA1, e5fp1A1, e5fq8B2, e5fr8A2, e5fvnA1,
#e5ldvA1, e5m9bA1, e5mdoA1, e5o65A1, e5t3rD1, e6e4vA1, e6ehbA1, e6ehdA1, e6fokA1, e6gieA1, e6i96A1, e6r2qB1, e6sljA1, e6ucuA1, e6v81A2 

def load_protein_chains(cache_path):
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as file:
            protein_chains = pickle.load(file)
        print("Loaded list")
    else:
        protein_chains = []
        for idx, row in tqdm(df.iterrows(), total=len(df), desc='Fetching ProteinChains'):
            id = row['id']
            path = f"pdb_files/{id}.pdb"
            if not os.path.exists(path):
                download_pdb_file(id, url, path)
            try:
                out_membraine_chain = ProteinChain.from_pdb(path)
            except ValueError as e:
                print(f"ValueError while processing {id} at {path}: {e}")
                # Attempt to fetch the PDB from an alternative source if ValueError occurs
                pdb_id = getPdbId(id, url)
                out_membraine_chain = ProteinChain.from_rcsb(pdb_id)
            except Exception as e:
                print(f"MAX ERROR!!!!. Error while processing {id} at {path}: {e}")
            protein_chains.append(out_membraine_chain)

        with open(cache_path, 'wb') as file:
            pickle.dump(protein_chains, file)
        print("List saved successfully!")

    return protein_chains

In [9]:
def log_py3Dmol_to_wandb(view, pdb_id):
    temp_html_path = Path("temp_protein_view.html")
    
    # Write HTML content using a file handle
    with temp_html_path.open('w', encoding='utf-8') as f:
        view.write_html(f, fullpage=True)
   
    # Read the HTML content
    with temp_html_path.open('r', encoding='utf-8') as f:
        html_content = f.read()
        
    # Log to W&B as HTML
    wandb.log({
        pdb_id: wandb.Html(html_content)
    })
   
    # Clean up temporary file
    temp_html_path.unlink()

In [10]:
def view_protein_chain(protein_chain,id):
  view = py3Dmol.view(width=500, height=500)

  # py3Dmol requires the atomic coordinates to be in PDB format, so we convert the `ProteinChain` object to a PDB string
  pdb_str = protein_chain.to_pdb_string()
  # Load the PDB string into the `py3Dmol` view object
  view.addModel(pdb_str, "pdb")
  # Set the style of the protein chain
  view.setStyle({"cartoon": {"color": "spectrum"}})
  # Zoom in on the protein chain
  view.zoomTo()
  # Display the protein chain
  #view.show()
  log_py3Dmol_to_wandb(view, id)
  

In [11]:
def create_masked_protein_chain(protein_chain, mask_percent=0.1):
  mask_pos = int(len(protein_chain.sequence) * (1-mask_percent))
  mask_amount = len(protein_chain.sequence) - mask_pos
  # Create a mask for the sequence
  #print(f"Masking {mask_percent * 100}% ({mask_amount}) chars at the end of the sequence.")
  sequence_prompt = protein_chain.sequence[:mask_pos] + ''.join(['_'] * mask_amount)
  #print("Sequence prompt:", sequence_prompt)
  return sequence_prompt, mask_pos

In [12]:
def view_masked_protein_chain(mask_pos, pdb_str, id):
  view = py3Dmol.view(width=500, height=500)
  inds = np.arange(0,mask_pos)
  view.addModel(pdb_str, "pdb")
  view.setStyle({"cartoon": {"color": "lightgrey"}})
  motif_res_inds = (
      inds + 1
  ).tolist()  # residue indices are 1-indexed in PDB files, so we add 1 to the indices
  view.addStyle({"resi": motif_res_inds}, {"cartoon": {"color": "cyan"}})
  view.zoomTo()
  #view.show()
  log_py3Dmol_to_wandb(view, id)

In [13]:
def predict(sequence_prompt, sequence_generation_config, structure_prediction_config):
  protein = ESMProtein(sequence=sequence_prompt)
  # Now, we can use the `generate` method of the model to decode the sequence
  sequence_generation = model.generate(protein, sequence_generation_config)
  print("Sequence Prompt:\n\t", protein.sequence)
  print("Generated sequence:\n\t", sequence_generation.sequence)
  # gets embeddings for the sequence generation
  protein_tensor_gen = model.encode(protein)
  output_gen = model.forward_and_sample(
      protein_tensor_gen, SamplingConfig(return_per_residue_embeddings=True)
  )
  embeddings_sequence_generation = output_gen.per_residue_embedding

  # generate structure
  structure_prediction_prompt = ESMProtein(sequence=sequence_generation.sequence)
  structure_prediction = model.generate(
      structure_prediction_prompt, structure_prediction_config
  )
  # get structure embeddings
  protein_tensor_struct = model.encode(structure_prediction_prompt)
  output_struct = model.forward_and_sample(
      protein_tensor_struct, SamplingConfig(return_per_residue_embeddings=True)
  )
  embeddings_structure_prediction = output_struct.per_residue_embedding
  
  del structure_prediction_prompt
  torch.cuda.empty_cache()
  return structure_prediction, embeddings_sequence_generation, embeddings_structure_prediction

In [14]:
def view_aligned_structures(pdb1, pdb2, inds, id):
  # Display the aligned structures using py3Dmol
  view = py3Dmol.view(width=1000, height=500)
  view.addModel(pdb1, "pdb")
  view.addModel(pdb2, "pdb")
  view.setStyle({'model': 0}, {"cartoon": {"color": "lightgrey"}})
  view.setStyle({'model': 1}, {"cartoon": {"color": "lightgreen"}})
  view.addStyle(
      {"resi": (inds + 1).tolist()},
      {"cartoon": {"color": "cyan"}})
  view.zoomTo()
  #view.show()
  log_py3Dmol_to_wandb(view, id)
  

In [15]:
def view_side_by_side_structures(pdb1, pdb2, motifInds, generatedInds, id):
    # Display the side-by-side structures using py3Dmol
    view = py3Dmol.view(width=1000, height=500, viewergrid=(1, 2))
    view.addModel(pdb1, "pdb", viewer=(0, 0))
    view.setStyle({"cartoon": {"color": "lightgrey"}}, viewer=(0, 0))
    view.addStyle({"resi": (motifInds + 1).tolist()}, {"cartoon": {"color": "cyan"}}, viewer=(0, 0))
    view.addModel(pdb2, "pdb", viewer=(0, 1))
    view.setStyle({"cartoon": {"color": "cyan"}}, viewer=(0, 1))
    view.addStyle({"resi": (generatedInds + 1).tolist()}, {"cartoon": {"color": "lightgreen"}}, viewer=(0, 1))
    view.zoomTo()
    #view.show()
    log_py3Dmol_to_wandb(view, id)


In [16]:
def getGenerationConfigs(sequence_prompt):
    sequence_generation_config = GenerationConfig(
        track="sequence", 
        num_steps=sequence_prompt.count("_")
        // 4, 
        temperature=0.5,
        top_p = 1,
        schedule='cosine'
    )
    structure_prediction_config = GenerationConfig(
        track="structure", 
        num_steps=len(sequence_prompt) // 10,
        temperature=0.7,
        top_p = 1,
        schedule='cosine'
    )
    return sequence_generation_config, structure_prediction_config

In [38]:
def log_summary_statistics(rmsd_results):
    mean = np.mean(rmsd_results)
    std = np.std(rmsd_results)
    min_val = np.min(rmsd_results)
    max_val = np.max(rmsd_results)
    median = np.median(rmsd_results)

    wandb.run.summary.update({
        "mean_rmsd": mean,
        "std_rmsd": std,
        "min_rmsd": min_val,
        "max_rmsd": max_val,
        "median_rmsd": median
    })

def log_histogram(rmsd_results):
    fig = go.Figure(data=[go.Histogram(x=rmsd_results, nbinsx=30)])
    with tempfile.NamedTemporaryFile(suffix='.html', delete=False) as tmp_file:
        html_path = tmp_file.name
        fig.write_html(html_path)
        wandb.log({"Histogram of rmsd values": wandb.Html(html_path)})

def log_scatter_plot(x, y, ids, x_title, y_title, plot_title, log_name):
    fig = go.Figure(data=[go.Scatter(
        x=x, 
        y=y, 
        mode='markers',
        marker=dict(size=10, color=y, colorscale='Viridis', colorbar=dict(title='RMSD')),
        text=ids
    )])

    fig.update_layout(
        title=plot_title,
        xaxis_title=x_title,
        yaxis_title=y_title
    )

    with tempfile.NamedTemporaryFile(suffix='.html', delete=False) as tmp_file:
        html_path = tmp_file.name
        fig.write_html(html_path)
        wandb.log({log_name: wandb.Html(html_path)})

def log_umap_plot(embeddings, labels, plot_title, log_name):

    traces = []
    for unique_id in set(labels):
        # Filter points for this specific ID
        mask = [i for i, id_val in enumerate(labels) if id_val == unique_id]
        
        trace = go.Scatter(
            x=embeddings[mask, 0],
            y=embeddings[mask, 1],
            mode='markers',
            name=f'{unique_id}',
            marker=dict(size=6),
            text=[f'{labels[i]}' for i in mask], 
            hoverinfo='text'
        )
        traces.append(trace)
   
    fig = go.Figure(data=traces)
   
    fig.update_layout(
        title=plot_title,
        xaxis_title='UMAP Component 1',
        yaxis_title='UMAP Component 2',
        legend_title_text='Labels',
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="left",
            x=0.01,
            font=dict(size=8),
            itemsizing='constant',  
            traceorder='grouped',  
            itemwidth=30 
        ),
        legend_itemwidth=30 
    )
    
    with tempfile.NamedTemporaryFile(suffix='.html', delete=False) as tmp_file:
        html_path = tmp_file.name
        fig.write_html(html_path)
        wandb.log({log_name: wandb.Html(html_path)})

def wand_logs(df, sequence_embeddings, structure_embeddings, labels):
    rmsd_results = df['rmsd'].to_list()
    ids = df['id'].to_list()
    seq_len = df['seq_len'].to_list()
    strands = df['strands'].to_list()

    seq_len_per_embedding = []
    strand_per_embedding = []
    rmsd_per_embedding = []
    for id in labels:
        seq_length = df[df['id'] == id]['seq_len'].values[0]
        strand_value = df[df['id'] == id]['strands'].values[0]
        rmsd_value = df[df['id'] == id]['rmsd'].values[0]
        seq_len_per_embedding.append(seq_length)
        strand_per_embedding.append(strand_value)
        rmsd_per_embedding.append(rmsd_value)


    protein_metrics = list(zip(ids, rmsd_results, seq_len, strands))
    wandb.log({
        "rmsd_results": wandb.Table(
            data=protein_metrics,
            columns=["ID", "RMSD", "Sequence Length", "Strands"]
        )
    })

    log_histogram(rmsd_results)

    log_scatter_plot(seq_len, rmsd_results, ids, "Sequence Length", "RMSD Value", "RMSD Values by sequence length", "RMSD Scatter plot")
    log_scatter_plot(strands, rmsd_results, ids, "Number of Strands", "RMSD Value", "RMSD Values by number of strands", "RMSD Scatter plot")

    sequence_reducer = umap.UMAP(n_components=2)
    sequence_emb = sequence_reducer.fit_transform(sequence_embeddings)

    structure_reducer = umap.UMAP(n_components=2)
    structure_emb = structure_reducer.fit_transform(structure_embeddings)

    log_umap_plot(sequence_emb, labels,  "Protein Sequence Embeddings Colored By Id", "Embeddings Sequence")
    log_umap_plot(structure_emb, labels, "Protein Structure Embeddings Colored By Id", "Embeddings Structure")

    log_umap_plot(sequence_emb, seq_len_per_embedding,  "Protein Sequence Embedding Colored By Sequence Len", "Embeddings Sequence")
    log_umap_plot(sequence_emb, strand_per_embedding, "Protein Sequence Embeddings Colored By Strands", "Embeddings Sequence")

    log_umap_plot(structure_emb, seq_len_per_embedding, "Protein Structure Embeddings Colored By Sequence Len", "Embeddings Structure")
    log_umap_plot(structure_emb, strand_per_embedding, "Protein Structure Embeddings Colored By Strands", "Embeddings Structure")

    log_umap_plot(sequence_emb, rmsd_per_embedding, "Protein Sequence Embeddings Colored By RMSD", "Embeddings Sequence")
    log_umap_plot(structure_emb, rmsd_per_embedding, "Protein Structure Embeddings Colored By RMSD", "Embeddings Structure")

    log_summary_statistics(rmsd_results)


In [None]:
mask_percantage = 0.1
wandb.init(project="DFold", config={"mask_percantage": mask_percantage})
cache_path = 'protein_chains.pkl'
protein_chains = load_protein_chains(cache_path)
rmsd_results = []
ids = df['id'].to_list()
all_sequence_embeddings = []
all_structure_embeddings = []
labels = []
number_of_runs = 1
for protein_chain,id in zip(protein_chains, ids):
    print("id", id)
    #print("Protein sequence length: {}".format(len(protein_chain)))
    #print("Sequence: {}".format(protein_chain.sequence))

    # View loaded protein chain
    #print("Loaded protein:")
    view_protein_chain(protein_chain,id)

    masked_protein, mask_pos = create_masked_protein_chain(protein_chain, mask_percent=mask_percantage)

    # View masked protein
    #print("Masked protein:")
    pdb_str = protein_chain.to_pdb_string()
    view_masked_protein_chain(mask_pos, pdb_str, id)

    # get generation configs 
    sequence_generation_config, structure_prediction_config = getGenerationConfigs(masked_protein)

    # add to wandb config
    wandb.config.update({
        "sequence_num_steps": sequence_generation_config.num_steps,
        "sequence_temperature": sequence_generation_config.temperature,
        "sequence_top_p": sequence_generation_config.top_p,
        "sequence_schedule": sequence_generation_config.schedule,
        "structure_num_steps": structure_prediction_config.num_steps,
        "structure_temperature": structure_prediction_config.temperature,
        "structure_top_p": structure_prediction_config.top_p,
        "structure_schedule": structure_prediction_config.schedule
    })

    # Use ESM3 to predict protein structure of the masked protein
    structure_prediction, sequence_embeddings, structure_embeddings = predict(masked_protein, sequence_generation_config, structure_prediction_config)

    labels.extend([id]*sequence_embeddings.shape[0]) # sequence and structure are same len so it doesnt matter which embedding is used
    # Convert the structure prediction to a ProteinChain object
    structure_prediction_chain = structure_prediction.to_protein_chain()

    # Align the generated structure with the original structure using the non-masked sequence
    inds = np.arange(0, mask_pos)
    aligned_chain = structure_prediction_chain.align(
        protein_chain, mobile_inds=inds, target_inds=inds)

    # View aligned structures
    pdb1 = aligned_chain.to_pdb_string()
    pdb2 = protein_chain.to_pdb_string()
    view_aligned_structures(pdb1, pdb2, inds, id)

    # Calculate RMSD on the masked part
    masked_inds = np.arange(mask_pos, len(protein_chain.sequence))

    view_side_by_side_structures(pdb1,pdb2, inds, masked_inds, id)
    crmsd_masked = aligned_chain.rmsd(protein_chain, mobile_inds=masked_inds, target_inds=masked_inds, only_compute_backbone_rmsd=True)
    print("RMSD: ", crmsd_masked)

    rmsd_results.append(crmsd_masked)

    all_sequence_embeddings.append(sequence_embeddings)
    all_structure_embeddings.append(structure_embeddings)

    del aligned_chain, structure_prediction_chain, protein_chain

df['rmsd'] = rmsd_results

all_sequence_embeddings = torch.cat(all_sequence_embeddings).detach().cpu().numpy()
all_structure_embeddings = torch.cat(all_structure_embeddings).detach().cpu().numpy()
wand_logs(df, all_sequence_embeddings, all_structure_embeddings, labels)
wandb.finish()

df.to_csv('OMBB_data_crmsd.csv', index=False)




Loaded list
id e1af6A1


100%|██████████| 10/10 [00:00<00:00, 12.88it/s]


Sequence Prompt:
	 VDFHGYARSGIGWTGSGGEQQCFQTTGAQSKYRLGNECETYAELKLGQEVWKEGDKSFYFDTNVAYSVAQQNDWEATDPAFREANVQGKNLIEWLPGSTIWAGKRFYQRHDVHMIDFYYWDISGPGAGLENIDVGFGKLSLAATRSSEAGGSSSFASNNIYDYTNETANDVFDVRLAQMEINPGGTLELGVDYGRANLRDNYRLVDGASKDGWLFTAEHTQSVLKGFNKFVVQYATDSMTSQGKGLSQGSGVAFDNEKFAYNINNNGHMLRILDHGAISMGDNWDMMYVGMYQDINWDNDNGTKWWTVGIRPMYKWTPIMSTVMEIGYDNVESQRTGDKNNQYKITLAQQWQAGDSIWSRPAIRVFATYAKWDEKWGY___________________________________________
Generated sequence:
	 VDFHGYARSGIGWTGSGGEQQCFQTTGAQSKYRLGNECETYAELKLGQEVWKEGDKSFYFDTNVAYSVAQQNDWEATDPAFREANVQGKNLIEWLPGSTIWAGKRFYQRHDVHMIDFYYWDISGPGAGLENIDVGFGKLSLAATRSSEAGGSSSFASNNIYDYTNETANDVFDVRLAQMEINPGGTLELGVDYGRANLRDNYRLVDGASKDGWLFTAEHTQSVLKGFNKFVVQYATDSMTSQGKGLSQGSGVAFDNEKFAYNINNNGHMLRILDHGAISMGDNWDMMYVGMYQDINWDNDNGTKWWTVGIRPMYKWTPIMSTVMEIGYDNVESQRTGDKNNQYKITLAQQWQAGDSIWSRPAIRVFATYAKWDEKWGYNAGGGPSNGGSYGATAGNGGNGSGQYVDKNEAWTFGVQIEAWW


100%|██████████| 42/42 [00:02<00:00, 17.04it/s]


RMSD:  5.167429586678737
id e1kmoA2


100%|██████████| 13/13 [00:00<00:00, 16.64it/s]


Sequence Prompt:
	 IPQDFGIEAGVEGQLSPTSSQNNPKETHNLMVGGTADNGFGTALLYSGTRGSDWREHSATRIDDLMLKSKYAPDEVHTFNSLLQYYDGEADMPGGLSRADYDADRWQSTRPYDRFWGRRKLASLGYQFQPDSQHKFNIQGFYTQTLRSGYLEQGKRITLSPRNYWVRGIEPRYSQIFMIGPSAHEVGVGYRYLNESTHEMRYYTATSSGQLPSGSSPYDRDTRSGTEAHAWYLDDKIDIGNWTITPGMRFEHIESYQNNAITGTHEEVSYNAPLPALNVLYHLTDSWNLYANTEGSFGTVQYSQIGKAVQSGNVEPEKARTWELGTRYDDGALTAEMGLFLINFNNQYDSNQTNDTVTARGKTRHTGLETQARYDLGTLTPTLDNVSIYASYAYVNAEIREKGDTYGNLVPFSPKHKGTLGVDYKPGNWTFNLNSDFQSSQFADNANTVKESADGSTGRIPGFMLWGARV_____________________________________________________
Generated sequence:
	 IPQDFGIEAGVEGQLSPTSSQNNPKETHNLMVGGTADNGFGTALLYSGTRGSDWREHSATRIDDLMLKSKYAPDEVHTFNSLLQYYDGEADMPGGLSRADYDADRWQSTRPYDRFWGRRKLASLGYQFQPDSQHKFNIQGFYTQTLRSGYLEQGKRITLSPRNYWVRGIEPRYSQIFMIGPSAHEVGVGYRYLNESTHEMRYYTATSSGQLPSGSSPYDRDTRSGTEAHAWYLDDKIDIGNWTITPGMRFEHIESYQNNAITGTHEEVSYNAPLPALNVLYHLTDSWNLYANTEGSFGTVQYSQIGKAVQSGNVEPEKARTWELGTRYDDGALTAEMGLFLINFNNQYDSNQTNDTVTARGKTRHTGLETQARYDLGTLTPTLDNVSIYASYAYVNAEIREKGDTYGNLVPFSPKHKGTLGVDYKPGNWTFNLNS

100%|██████████| 52/52 [00:03<00:00, 16.16it/s]


RMSD:  3.7544779620132727
id e1p4tA1


100%|██████████| 4/4 [00:00<00:00, 15.02it/s]


Sequence Prompt:
	 EGASGFYVQADAAHAKASSSLGSAKGFSPRISAGYRINDLRFAVDYTRYKNYKAPSTDFKLYSIGASAIYDFDTQSPVKPYLGARLSLNRASVDLGGSDSFSQTSIGLGVLTGVSYAVTPNVDLDAGYRYNYIGKVNTV________________
Generated sequence:
	 EGASGFYVQADAAHAKASSSLGSAKGFSPRISAGYRINDLRFAVDYTRYKNYKAPSTDFKLYSIGASAIYDFDTQSPVKPYLGARLSLNRASVDLGGSDSFSQTSIGLGVLTGVSYAVTPNVDLDAGYRYNYIGKVNTVSVKSHNYSLGIRVRLF


100%|██████████| 15/15 [00:00<00:00, 16.10it/s]


RMSD:  0.9393385815575441
id e1prnA1


100%|██████████| 7/7 [00:00<00:00, 15.45it/s]


Sequence Prompt:
	 EISLNGYGRFGLQYVEDRGVGLEDTIISSRLRINIVGTTETDQGVTFGAKLRMQWDDGDAFAGTAGNAAQFWTSYNGVTVSVGNVDTAFDSVALTYDSEMGYEASSFGDAQSSFFAYNSKYDASGALDNYNGIAVTYSISGVNLYLSYVDPDQTVDSSLVTEEFGIAADWSNDMISLAAAYTTDAGGIVDNDIAFVGAAYKFNDAGTVGLNWYDNGLSTAGDQVTLYGNYAFGATTVRAYVSDIDRAGADTAYGIGADYQ_____________________________
Generated sequence:
	 EISLNGYGRFGLQYVEDRGVGLEDTIISSRLRINIVGTTETDQGVTFGAKLRMQWDDGDAFAGTAGNAAQFWTSYNGVTVSVGNVDTAFDSVALTYDSEMGYEASSFGDAQSSFFAYNSKYDASGALDNYNGIAVTYSISGVNLYLSYVDPDQTVDSSLVTEEFGIAADWSNDMISLAAAYTTDAGGIVDNDIAFVGAAYKFNDAGTVGLNWYDNGLSTAGDQVTLYGNYAFGATTVRAYVSDIDRAGADTAYGIGADYQLSKAALLYAEVGEDFDGNTVATAGMRFNF


100%|██████████| 28/28 [00:01<00:00, 18.02it/s]


RMSD:  0.5175686554503592
id e1qd5A1


100%|██████████| 6/6 [00:00<00:00, 15.98it/s]


Sequence Prompt:
	 AVRGSIIANMLQEHDNPFTLYPYDTNYLIYTQTSDLNKEAIASYDWAENARKDEVKFQLSLAFPLWRGILGPNSVLGASYTQKSWWQLSNSEESSPFRETNYEPQLFLGFATDYRFAGWTLRDVEMGYNHDSNGRSDPTSRSWNRLYTRLMAENGNWLVEVKPWYVVGNTDDNPDITKYMGYYQLKIGYHLGDAVLSAKGQYNWNTGYGGAELGLSYPITKHVRLYTQVYS__________________________
Generated sequence:
	 AVRGSIIANMLQEHDNPFTLYPYDTNYLIYTQTSDLNKEAIASYDWAENARKDEVKFQLSLAFPLWRGILGPNSVLGASYTQKSWWQLSNSEESSPFRETNYEPQLFLGFATDYRFAGWTLRDVEMGYNHDSNGRSDPTSRSWNRLYTRLMAENGNWLVEVKPWYVVGNTDDNPDITKYMGYYQLKIGYHLGDAVLSAKGQYNWNTGYGGAELGLSYPITKHVRLYTQVYSGYGESLIDYNYRQTRFGIGFAATDWL


100%|██████████| 25/25 [00:01<00:00, 18.02it/s]


RMSD:  1.5955694745301443
id e1qj8A1


100%|██████████| 3/3 [00:00<00:00, 18.81it/s]


Sequence Prompt:
	 ATSTVTGGYAQSDAQGQMNKMGGFNLKYRYEEDNSPLGVIGSFTYTEKSRTASSGDYNKNQYYGITAGPAYRINDWASIYGVVGVGYGKFQTTEYPTYKNDTSDYGFSYGAGLQFNPMENVALDFSYEQSRIR_______________
Generated sequence:
	 ATSTVTGGYAQSDAQGQMNKMGGFNLKYRYEEDNSPLGVIGSFTYTEKSRTASSGDYNKNQYYGITAGPAYRINDWASIYGVVGVGYGKFQTTEYPTYKNDTSDYGFSYGAGLQFNPMENVALDFSYEQSRIRGTAIDTFRATVGYKF


100%|██████████| 14/14 [00:00<00:00, 18.59it/s]


RMSD:  0.3094053123167537
id e1qjpA1


100%|██████████| 3/3 [00:00<00:00, 18.18it/s]


Sequence Prompt:
	 APKDNTWYTGAKLGWSQHENKLGAGAFGGYQVNPYVGFEMGYDWLGRMPYAYKAQGVQLTAKLGYPITDDLDIYTRLGGMVWRADTYSNVYGKNHDTGVSPVFAGGVEYAITPEIATRLEYQW______________
Generated sequence:
	 APKDNTWYTGAKLGWSQHENKLGAGAFGGYQVNPYVGFEMGYDWLGRMPYAYKAQGVQLTAKLGYPITDDLDIYTRLGGMVWRADTYSNVYGKNHDTGVSPVFAGGVEYAITPEIATRLEYQWYKVKDDGVNNSDGT


100%|██████████| 13/13 [00:00<00:00, 16.18it/s]


RMSD:  3.8278689171710587
id e1t16A1


100%|██████████| 10/10 [00:00<00:00, 18.58it/s]


Sequence Prompt:
	 AGFQLNEFSSSGLGRAYSGEGAIADDAGNVSRNPALITMFDRPTFSAGAVYIDPDVNISGTSPSGRSLKADNIAPTAWVPNMHFVAPINDQFGWGASITSNYGLATEFNDTYAGGSVGGTTDLETMNLNLSGAYRLNNAWSFGLGFNAVYARAKIERFAGDLGQLVAGQIMQSPAGQTQQGQALAATANGIDSNTKIAHLNGNQWGFGWNAGILYELDKNNRYALTYRSEVKIDFKGNYSSDLNRAFNNYGLPIPTATGGATQSGYLTLNLPEMWEVSGYNRVDPQWAIHYSLAYTSWSQFQQLKATSTSGDTLFQKHEGFKDAYRIALGTTYYYDDNWTFRTGIAFDDSPVPAQNRSISIPDQDRFWLSAGTTYAFNKDASVD___________________________________________
Generated sequence:
	 AGFQLNEFSSSGLGRAYSGEGAIADDAGNVSRNPALITMFDRPTFSAGAVYIDPDVNISGTSPSGRSLKADNIAPTAWVPNMHFVAPINDQFGWGASITSNYGLATEFNDTYAGGSVGGTTDLETMNLNLSGAYRLNNAWSFGLGFNAVYARAKIERFAGDLGQLVAGQIMQSPAGQTQQGQALAATANGIDSNTKIAHLNGNQWGFGWNAGILYELDKNNRYALTYRSEVKIDFKGNYSSDLNRAFNNYGLPIPTATGGATQSGYLTLNLPEMWEVSGYNRVDPQWAIHYSLAYTSWSQFQQLKATSTSGDTLFQKHEGFKDAYRIALGTTYYYDDNWTFRTGIAFDDSPVPAQNRSISIPDQDRFWLSAGTTYAFNKDASVDLAYSHLFVGDAPLDQGGDGNGGHVKGTAKSSVDLFGVQYNYQF


100%|██████████| 42/42 [00:02<00:00, 18.89it/s]


RMSD:  5.628108641637499
id e1tlyA1


100%|██████████| 6/6 [00:00<00:00, 17.69it/s]


Sequence Prompt:
	 LSDWWHQSVNVVGSYHTRFGPQIRNDTYLEYEAFAKKDWFDFYGYADAPVPLFMEIEPRFSIDKLTNTDLSFGPFKEWYFANNYIYDMGRNKDGRQSTWYMGLGTDIDTGLPMSLSMNVYAKYQWQNYGAANENEWDGYRFKIKYFVPITDLWGGQLSYIGFTNFDWGSDLGDDSGNAINGIKTRTNNSIASSHILALNYDHWHYSVVARYWHDGGQWNDDAELN__________________________
Generated sequence:
	 LSDWWHQSVNVVGSYHTRFGPQIRNDTYLEYEAFAKKDWFDFYGYADAPVPLFMEIEPRFSIDKLTNTDLSFGPFKEWYFANNYIYDMGRNKDGRQSTWYMGLGTDIDTGLPMSLSMNVYAKYQWQNYGAANENEWDGYRFKIKYFVPITDLWGGQLSYIGFTNFDWGSDLGDDSGNAINGIKTRTNNSIASSHILALNYDHWHYSVVARYWHDGGQWNDDAELNNGLNEHYVKLTSGFDFKDYGLRLLLR


100%|██████████| 25/25 [00:01<00:00, 18.62it/s]


RMSD:  3.905626995960378


VBox(children=(Label(value='56.212 MB of 56.212 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
max_rmsd,5.62811
mean_rmsd,2.84949
median_rmsd,3.75448
min_rmsd,0.30941
std_rmsd,1.91764
