# Goal : Make t-SNE figure from esm2 embedding representation of the proteins on which a fold was identified.
### 3 Dataset : phagedepo_out , CAZY hits , PL16
***
## I. Make multifasta 
## II. esm2 predictions
## III. Visualization t-SNE
***

In [11]:
import os
from Bio.PDB import PDBParser
from Bio.PDB.Polypeptide import PPBuilder
from Bio import SeqIO
from tqdm import tqdm 

def pdb_to_sequence(file_path):
    parser = PDBParser()
    structure = parser.get_structure('protein', file_path)
    ppb = PPBuilder()
    for pp in ppb.build_peptides(structure):
        sequence = pp.get_sequence()
        return sequence
        


> The PhageOut DB

In [39]:
import pandas as pd
import os 

path_project = "/media/concha-eloko/Linux/depolymerase_building"
dir_out = f"{path_project}/seekfold_phagedepo"
path_pdb = f"{path_project}/phagedepo_out"
path_info = f"/media/concha-eloko/Linux/depolymerase_building/depolymerase_fold.csv"

info_df = pd.read_csv(path_info , sep = "\t", header = 0)
dico_folds = {"jelly-roll" : [],
              "alpha/alpha toroid" : [],
              "right-handed beta-helix" : [] ,
              "TIM beta/alpha-barrel" : [],
              "6-bladed beta-propeller" : [] ,
              "Flavodoxin-like" : [] ,
              "Alpha/Beta hydrolase fold" : [] ,
              "Other" : [],
             }

outputs = [f"{dir_out}/{file}" for file in os.listdir(dir_out) if file[-3:]=="out"]
header_seekfold = ["query","target","pident","alnlen","gapopen","qstart","qend","tstart","tend","bits","prob"]
depo_results = {}
for results in outputs :
    results_df = pd.read_csv(f"{results}", sep = "\t" , names = header_seekfold)
    for _,row in results_df.iterrows() :
        fold = info_df[info_df["ID"] == row["target"]]["X_Group_Name"].values[0]
        if row["prob"] >= 0.2 :
            if row["query"] not in dico_folds[fold] :
                dico_folds[fold].append(row["query"])
        elif fold == "right-handed beta-helix" and row["prob"] >= 0.2 :
            if row["query"] not in dico_folds[fold] :
                dico_folds[fold].append(row["query"])


In [41]:
dico_data_final = {}

In [42]:
import warnings
warnings.filterwarnings("ignore") 
for fold in tqdm(dico_folds) :
    if fold not in ["jelly-roll" , "Other"] and len(dico_folds[fold]) > 0:
        if fold == "6-bladed beta-propeller" :
            fold_name = "n-bladed beta-propeller"
        else :
            fold_name = fold
        for _,protein in enumerate(dico_folds[fold]) :
            seq = pdb_to_sequence(f"{path_pdb}/{protein}")
            a = {protein : {"Dataset" : "PhageDepo_out", "Seq" : seq}}
            if fold_name not in dico_data_final :
                dico_data_final[fold_name] = a
            else :
                dico_data_final[fold_name].update(a)
            


100%|██████████████████████████████████████████████████████████| 8/8 [01:50<00:00, 13.86s/it]


In [43]:
dico_data_final

{'alpha/alpha toroid': {'2170.pdb': {'Dataset': 'PhageDepo_out',
   'Seq': Seq('MASKKIRILTPEQAHALYALPARLTAAGVTNTIQGMLDSIALGNIMSAGANVFN...GAR')}},
 'right-handed beta-helix': {'5416.pdb': {'Dataset': 'PhageDepo_out',
   'Seq': Seq('MTTKVNNRMIDGAAVNVLDFGADPKGVSDSTTAFQSAIDSINGGKLIVPEGTYY...ASH')},
  '3777.pdb': {'Dataset': 'PhageDepo_out',
   'Seq': Seq('MNPQFAQPKGSTSKESNKDSIARKFGCKKSEVVYAKAGQSLSGYKVIYDKLSQR...VIG')},
  '4603.pdb': {'Dataset': 'PhageDepo_out',
   'Seq': Seq('MAIAIVEELWTRIRSAIDDKIIAAQDAVDRAATSATNAKTSETAAAQSASEAEA...YNW')},
  '6115.pdb': {'Dataset': 'PhageDepo_out',
   'Seq': Seq('MSSSCGGVMSLNDLQIAKKHQIFEAEVITGKQGGVAGGADIDYATNQVTGQTQK...TLA')},
  '1809.pdb': {'Dataset': 'PhageDepo_out',
   'Seq': Seq('MATTPTSLPIPSEDPRDLKFNAGKFDEVMTSDAHYYVDRFGVKRWTIAGFQYTA...TLA')},
  '5282.pdb': {'Dataset': 'PhageDepo_out',
   'Seq': Seq('MGYFQMTRNVEELFGGVITAPHQIPFTYKSNVGGETFLSLPFYPVTGVVTINGG...SSI')},
  '1856.pdb': {'Dataset': 'PhageDepo_out',
   'Seq': Seq('MATTPTSLPIPSEDPRDLKFNAGKFDEVMTS

> CAZY dataset

In [44]:
import pandas as pd
import os 

path_project = "/media/concha-eloko/Linux/depolymerase_building"
dir_out = f"{path_project}/CAZY_seekfold"

path_info = f"/media/concha-eloko/Linux/PhageDEPOdetection/PDB_files/folds_CAZY.tsv"
info_df = pd.read_csv(path_info , sep = "\t", header = 0)

outputs = [f"{dir_out}/{file}" for file in os.listdir(dir_out) if file[-3:]=="out"]
header_seekfold = ["query","target","pident","alnlen","gapopen","qstart","qend","tstart","tend","bits","prob"]
depo_results = {}
for results in tqdm(outputs) :
    results_df = pd.read_csv(f"{results}", sep = "\t" , names = header_seekfold)
    for _,row in results_df.iterrows() :
        if row["prob"] >= 0.5 :
            try : 
                fold = info_df[info_df["Protein"] == row["target"]]["Folds"].values[0]
                if row["alnlen"] >= 250 :
                    if fold not in ["β-jelly roll"] :
                        if fold.count("propeller") > 0 : 
                            fold = "n-bladed beta-propeller"
                        elif fold.count("α / α") > 0 and fold.count("barrel") > 0 :
                            fold = "(α / α) n barrel"
                        elif fold.count("β / α") > 0 and fold.count("barrel") > 0 :
                            fold = "(β / α) n barrel"                            
                        else :
                            break
                        seq = pdb_to_sequence(f"{path_pdb}/{row['query']}")
                        a = {row['query'] : {"Dataset" : "CAZY", "Seq" : seq}}
                        if fold not in dico_data_final :
                            dico_data_final[fold] = a
                        else :
                            dico_data_final[fold].update(a)
            except Exception as e :
                pass

100%|██████████████████████████████████████████████████████| 990/990 [00:46<00:00, 21.42it/s]


In [29]:
import pandas as pd
import os 

path_project = "/media/concha-eloko/Linux/depolymerase_building"
dir_out = f"{path_project}/CAZY_seekfold"

path_info = f"/media/concha-eloko/Linux/PhageDEPOdetection/PDB_files/folds_CAZY.tsv"
info_df = pd.read_csv(path_info , sep = "\t", header = 0)

outputs = [f"{dir_out}/{file}" for file in os.listdir(dir_out) if file[-3:]=="out"]
header_seekfold = ["query","target","pident","alnlen","gapopen","qstart","qend","tstart","tend","bits","prob"]
depo_results = {}
for results in tqdm(outputs) :
    results_df = pd.read_csv(f"{results}", sep = "\t" , names = header_seekfold)
    for _,row in results_df.iterrows() :
        if row["prob"] >= 0.5 :
            try : 
                fold = info_df[info_df["Protein"] == row["target"]]["Folds"].values[0]
                print(fold)
            except Exception as e :
                print(fold)
                print(e)
                pass

  1%|▍                                                       | 8/990 [00:00<00:12, 77.60it/s]

['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
['parallel β-helix']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 7 barrel']
[]
[]


  3%|█▌                                                     | 28/990 [00:00<00:11, 83.14it/s]

[]
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
['5-fold β-propeller']
[]
[]
[]
['β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-propeller']
[]
['parallel β-helix']
[]
[]
['( β / α ) 8 barrel']
[]
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
['β-propeller']
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
['β-jelly roll' 'β-jelly roll']
['( α / α ) 6 barrel']
['( α / α ) 7 barrel']
['5-fold β-propeller']
['( α / α ) 6 barrel']
[]
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 7 bar

  6%|███▏                                                   | 57/990 [00:00<00:10, 87.00it/s]

['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
['β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
['( α / α ) 7 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
[]


  8%|████▍                                                  | 79/990 [00:00<00:09, 97.31it/s]

['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
['( β / α ) 8 barrel']
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]


 11%|█████▉                                                | 109/990 [00:01<00:09, 92.97it/s]

['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
['β-jelly roll' 'β-jelly roll']
['β-jelly roll']
['5-fold β-propeller']


 12%|██████▌                                               | 121/990 [00:01<00:08, 98.22it/s]

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
['parallel β-helix']
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']
['5-fold β-propeller']
['( α / α ) 6 barrel']
['5-fold β-propeller']
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['parallel β-helix']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
['( β / α ) 8 barrel']


 14%|███████▋                                              | 140/990 [00:01<00:10, 83.62it/s]

['β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 (inferred)']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
['β-jelly roll']
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
['5-fold β-propeller']
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll' 'β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
['β-jelly roll']
[]
['β-jelly roll']
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll' 'β-jelly roll']
[]
[]
['β-jelly roll']
[]
['β-jelly roll']
['β-jelly roll']
[]
[]
[]
[]
[]
[]


 16%|████████▌                                             | 158/990 [00:01<00:11, 73.70it/s]

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
['5-fold β-propeller']
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
['β-jelly roll']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]


 18%|█████████▋                                            | 177/990 [00:02<00:09, 81.82it/s]

[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 7 barrel']
[]
[]
[]
[]
[]
[]


 20%|██████████▉                                           | 201/990 [00:02<00:08, 96.75it/s]

[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
['( α / α ) 7 barrel']
[]
[]
[]
['β-jelly roll']
['β-jelly roll' 'β-jelly roll']
[]
['β-jelly roll']
['( β / α ) 8 barrel']
[]
[]
['β-jelly roll']
['5-fold β-propeller']
[]
['5-fold β-propeller']
[]
[]
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
['β-propeller' 'β-propeller']
[]
['β-jelly roll' 'β-jelly roll']
['( β / α ) 8 barrel']
[]


 23%|████████████▏                                         | 224/990 [00:02<00:08, 91.69it/s]

['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
['β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
['( α / α ) 7 barrel']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
['( β / α ) 8 barrel']
['5-fold β-propeller']
['5-fold β-propeller']
[]
[]
[]
['parallel β-helix']
[]
[]


 25%|█████████████▎                                        | 244/990 [00:02<00:08, 87.21it/s]

[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-propeller']
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['β-jelly roll']
['β-jelly roll' 'β-jelly roll']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
['5-fold β-propeller']
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-propeller']
[]
[]
[]
[]
[]


 27%|██████████████▍                                       | 264/990 [00:03<00:08, 88.29it/s]

[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 7 barrel']
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
['( α / α ) 7 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
['5-fold β-propeller']
[]
[]
['5-fold β-propeller']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
['β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
['( β / α ) 8 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]


 30%|████████████████▏                                     | 296/990 [00:03<00:07, 95.89it/s]

[]
[]
['β-jelly roll' 'β-jelly roll']
[]
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['parallel β-helix']
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
['5-fold β-propeller']
[]
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
['5-fold β-propeller']
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['β-propeller' 'β-propeller']


 31%|████████████████▋                                     | 306/990 [00:03<00:07, 91.89it/s]

[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
[]
['lysozyme fold']
[]
[]
[]
['lysozyme fold']
[]
['( β / α ) 8 barrel']
[]
[]
[]
['parallel β-helix']
['( β / α ) 8 barrel']
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['lysozyme fold']
[]
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
['parallel β-helix']
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['β-propeller' 'β-propeller']
['( β / α ) 8 barrel']
['( β / α ) 8 barrel

 34%|██████████████████▎                                   | 335/990 [00:03<00:07, 89.09it/s]

[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
['( β / α ) 8 barrel']
[]
[]
[]
[]
['( β / α ) 8 barrel']
['parallel β-helix']
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
['parallel β-helix']
[]
[]
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
[]
[]
['( β / α ) 8 barrel']


 36%|███████████████████▎                                  | 355/990 [00:04<00:07, 87.20it/s]

[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]


 38%|████████████████████▎                                | 379/990 [00:04<00:05, 102.22it/s]

[]
[]
['β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
['parallel β-helix']
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]


 41%|█████████████████████▉                                | 402/990 [00:04<00:06, 91.68it/s]

['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 7 barrel']
[]
['( α / α ) 6 barrel']
['( α / α ) 7 barrel']
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
['( β / α ) 8 barrel']


 43%|███████████████████████▏                              | 424/990 [00:04<00:05, 94.75it/s]

[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-propeller']
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']


 45%|████████████████████████▏                             | 444/990 [00:04<00:06, 90.55it/s]

[]
[]
[]
['( β / α ) 8 barrel']
['5-fold β-propeller']
[]
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-propeller']
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
['parallel β-helix']
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( β / α ) 8 barrel']
[]
['β-jelly roll' 'β-jelly roll']
['5-fold β-propeller']
[]
[]
[]
[]
['β-jelly roll']
[]
['β-jelly roll']
[]
['β-jelly roll']
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
['β-jelly roll']
['β-jelly roll' 'β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
['β-jelly roll

 47%|█████████████████████████▎                            | 463/990 [00:05<00:06, 75.52it/s]

['β-jelly roll' 'β-jelly roll']
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
['β-jelly roll']
[]
[]
['β-jelly roll']
[]
[]
[]
[]
['β-jelly roll']
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
['5-fold β-propeller']
[]
[]
['β-jelly roll']
[]
['5-fold β-propeller']
[]
[]
[]
[]
['β-jelly roll' 'β-jelly roll']
[]
[]
['β-jelly roll']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
['( α / α ) 7 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
['( α / α ) 7 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
['β-jelly roll']
['5-fold β-propeller']
['5-fold β-propeller']
[]
['β-jelly roll']
[]
[]
['β-jelly roll' 'β

 49%|██████████████████████████▏                           | 481/990 [00:05<00:06, 76.88it/s]


[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll' 'β-jelly roll']
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
[]
[]
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['β-propeller']
[]
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]

 50%|███████████████████████████▏                          | 499/990 [00:05<00:06, 80.88it/s]


[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
['( β / α ) 8 barrel']
[]
['β-jelly roll']
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll' 'β-jelly roll']
[]
['5-fold β-propeller']
['5-fold β-propeller']
['parallel β-helix']
[]
[]
[]
['β-jelly roll']
[]
[]
[]
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
['parallel β-helix']
['( β / α ) 8 barrel']
[]
[]
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
[]
['( β / α ) 8 barrel']


 52%|████████████████████████████▏                         | 517/990 [00:05<00:06, 78.67it/s]

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 7 barrel']
[]
[]
[]
[]
[]
['lysozyme fold']
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['β-propeller' 'β-propeller']
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
['parallel β-helix']


 54%|█████████████████████████████                         | 533/990 [00:06<00:06, 71.57it/s]

[]
[]
['( β / α ) 8 barrel']
[]
[]
['( β / α ) 8 barrel']
[]
['parallel β-helix']
[]
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
['( β / α ) 8 barrel']
[]
[]
[]
['β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 7 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 7 barrel']
['( β / α ) 8 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
[]
['5-fold β-propeller']
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-propeller']
[]
[]
['( β / α ) 8 barrel']
[]

 55%|█████████████████████████████▌                        | 541/990 [00:06<00:06, 70.27it/s]


['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['5-fold β-propeller']
['( β / α ) 8 barrel']
['5-fold β-propeller']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 7 barrel']
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
[]
[]


 56%|██████████████████████████████▍                       | 557/990 [00:06<00:06, 68.31it/s]

[]
[]
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
[]
['parallel β-helix']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
['β-propeller']
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
['β-jelly roll' 'β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
['β-jelly roll' 'β-jelly roll']
[]
['5-fold β-propeller']
[]
[]
['( β / α ) 8 barrel']
[]
['parallel β-helix']


 58%|███████████████████████████████▍                      | 576/990 [00:06<00:05, 78.86it/s]

['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
['parallel β-helix']
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
[]
[]
['( α / α ) 6 barrel']
['5-fold β-propeller']
['5-fold β-propeller']
[]
[]
['β-jelly roll']
['5-fold β-propeller']
[]
[]
[]
['β-jelly roll']
[]
['β-jelly roll' 'β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
['β-jelly roll']
[]
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
['β-jelly roll' 'β-jelly roll']
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
[]
[]
['β-jelly roll']
['β-jelly roll']
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]


 60%|████████████████████████████████▌                     | 597/990 [00:07<00:04, 83.89it/s]

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
['β-propeller']
[]
[]
[]
[]
[]
[]


 62%|█████████████████████████████████▌                    | 615/990 [00:07<00:05, 69.66it/s]

[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['5-fold β-propeller']
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 7 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 7 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
['( β / α ) 8 barr

 63%|██████████████████████████████████▏                   | 626/990 [00:07<00:04, 75.82it/s]

[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 7 barrel']
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
['5-fold β-propeller']
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
[]
['β-jelly roll' 'β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
['β-jelly roll']
['β-jelly roll']
[]
[]
[]
[]
[]
['β-jelly roll' 'β-jelly roll']
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
['β-propeller']
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]


 66%|███████████████████████████████████▉                  | 658/990 [00:07<00:03, 87.00it/s]

[]
[]
['parallel β-helix']
[]
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]


 67%|████████████████████████████████████▍                 | 668/990 [00:07<00:03, 84.51it/s]

['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
['β-propeller' 'β-propeller']
[]
['( α / α ) 6 barrel']
[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['parallel β-helix']
[]
[]
['5-fold β-propeller']
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
['β-propeller']
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
[]
[]
['parallel β-helix']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]


 69%|█████████████████████████████████████▍                | 686/990 [00:08<00:03, 78.27it/s]

['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
[]
[]
['( β / α ) 8 barrel']
['5-fold β-propeller']
['5-fold β-propeller']
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
['β-jelly roll']
['( β / α ) 8 barrel']
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
[]
['β-jelly roll' 'β-jelly roll']
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
['parallel β-helix']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
['5-fold β-propeller']
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
['parallel β-helix']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['5-fold β-pro

 72%|██████████████████████████████████████▊               | 712/990 [00:08<00:03, 79.54it/s]

[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]


 74%|████████████████████████████████████████              | 734/990 [00:08<00:02, 93.79it/s]

[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( β / α ) 8 barrel']
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']


 76%|█████████████████████████████████████████▏            | 754/990 [00:09<00:02, 90.33it/s]

['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
['5-fold β-propeller']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]


 77%|█████████████████████████████████████████▋            | 764/990 [00:09<00:02, 78.69it/s]

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
['β-jelly roll' 'β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
['5-fold β-propeller']
[]
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
[]
['β-jelly roll']
[]
['5-fold β-propeller']
[]
['β-jelly roll' 'β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 7 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]


 79%|██████████████████████████████████████████▌           | 781/990 [00:09<00:02, 73.81it/s]

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['5-fold β-propeller']
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['5-fold β-propeller']
['5-fold β-propeller']
[]
['5-fold β-propeller']


 81%|███████████████████████████████████████████▌          | 799/990 [00:09<00:02, 75.40it/s]

[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 7 barrel']
[]
[]
['( α / α ) 7 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']


 82%|████████████████████████████████████████████          | 808/990 [00:09<00:02, 71.23it/s]

['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
['β-jelly roll']
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll' 'β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
['β-jelly roll']
[]
[]
['5-fold β-propeller']
[]
[]
[]
['β-jelly roll' 'β-jelly roll']
['β-jelly roll']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
['5-fold β-propeller']
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
[]
['β-jelly roll']
[]
[]
['β-jelly roll']
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
['parallel β-helix']
['( α / α ) 6 toroid + anti-parallel β-sheet']
['5-fold β-propeller']
[]
[]
['( β / α ) 8 barrel']
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']


 84%|█████████████████████████████████████████████         | 827/990 [00:10<00:02, 78.91it/s]

[]
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
['( α / α ) 6 barrel']
['5-fold β-propeller']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
['β-jelly roll']
[]
[]
['β-jelly roll']


 85%|██████████████████████████████████████████████        | 844/990 [00:10<00:02, 72.53it/s]

['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
[]
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
['lysozyme fold']
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( β / α ) 8 barrel']
[]
[]
['parallel β-helix']
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
['parallel β-helix']
[]
[]
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]


 88%|███████████████████████████████████████████████▌      | 873/990 [00:10<00:01, 86.74it/s]

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
['( α / α ) 7 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
[]
['lysozyme fold']
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['β-propeller']
['5-fold β-propeller']
[]
[]


 90%|████████████████████████████████████████████████▊     | 894/990 [00:10<00:01, 95.47it/s]

[]
[]
['( β / α ) 8 barrel']
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
[]
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['parallel β-helix']
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
['( β / α ) 8 (inferred)']
[]

 92%|█████████████████████████████████████████████████▊    | 913/990 [00:11<00:00, 82.30it/s]


[]
['( β / α ) 8 barrel']
['5-fold β-propeller']
[]
[]
[]
[]
['5-fold β-propeller']
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]


 94%|██████████████████████████████████████████████████▉   | 934/990 [00:11<00:00, 88.03it/s]

['( β / α ) 8 barrel']
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
['β-propeller']


 96%|███████████████████████████████████████████████████▉  | 952/990 [00:11<00:00, 80.08it/s]

['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 7 barrel']
[]
[]
[]
[]
[]
[]
[]
['β-propeller']
[]
[]
[]
[]
['6-bladed β-propeller']
[]
[]
[]
[]
[]
['5-fold β-propeller']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']


 98%|████████████████████████████████████████████████████▉ | 970/990 [00:11<00:00, 76.59it/s]

['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
['( α / α ) 6 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( α / α ) 7 barrel']
[]
[]
['( α / α ) 7 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
['β-sandwich + β-sheet' 'β-sandwich + β-sheet']
[]
['parallel β-helix']
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
['( α / α ) 6 barrel']
['( α / α ) 6 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
['parallel β-helix']
['( β / α ) 8 barrel']
[]
[]
['( α / α ) 6 barrel']
[]
[]
[]


100%|██████████████████████████████████████████████████████| 990/990 [00:12<00:00, 82.40it/s]

['( β / α ) 8 barrel']
[]
['parallel β-helix']
['5-fold β-propeller']
[]
['( β / α ) 8 barrel']
['( β / α ) 8 barrel']
[]
['( β / α ) 8 barrel']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['5-fold β-propeller']
['β-jelly roll']
[]
[]
['( α / α ) 6 toroid + anti-parallel β-sheet']
[]
['parallel β-helix']
[]
['( β / α ) 8 barrel']





> PL16

In [45]:
path_PL16 = "/media/concha-eloko/Linux/PhageDEPOdetection/PL16_proteins"

for file in os.listdir(path_PL16) :
    seq = SeqIO.parse(f"{path_PL16}/{file}" , "fasta") 
    for record in seq : 
        name = file.split(".multi")[0]
        fold = "triple β helix"
        sequence = record.seq
        a = {name : {"Dataset" : "CAZY_PL16", "Seq" : sequence}}
        if fold not in dico_data_final :
            dico_data_final[fold] = a
        else :
            dico_data_final[fold].update(a)

In [46]:
with open("/media/concha-eloko/Linux/PhageDEPOdetection/PhageDepo.dataset.tsv" , "w") as outfile :
    outfile.write("Prot_ID\tFold\tDataset\tSeq\n")
    for fold in dico_data_final :
        for prot in dico_data_final[fold] :
            if len(str(dico_data_final[fold][prot]["Seq"])) > 200 :
                outfile.write(f"{prot}\t{fold}\t{dico_data_final[fold][prot]['Dataset']}\t{str(dico_data_final[fold][prot]['Seq'])}\n")
                
                
with open("/media/concha-eloko/Linux/PhageDEPOdetection/PhageDepo.dataset.multi.fasta" , "w") as outfile :
    for fold in dico_data_final :
        for prot in dico_data_final[fold] :
            if len(str(dico_data_final[fold][prot]["Seq"])) > 200 :
                outfile.write(f">{prot}__{fold}__{dico_data_final[fold][prot]['Dataset']}\n{str(dico_data_final[fold][prot]['Seq'])}\n")


In [None]:
rsync -avzhe ssh \
/media/concha-eloko/Linux/PhageDEPOdetection/PhageDepo.dataset.multi.fasta \
conchae@garnatxa.srv.cpd:/home/conchae/PhageDepo_pdb


***
## II.

In [None]:
#!/bin/bash
#BATCH --job-name=ESM_2__
#SBATCH --qos=medium
#SBATCH --ntasks=1 
#SBATCH --cpus-per-task=50 
#SBATCH --mem=200gb 
#SBATCH --time=2-00:00:00 
#SBATCH --output=ESM_2__%j.log 

source /storage/apps/ANACONDA/anaconda3/etc/profile.d/conda.sh
conda activate embeddings

python /home/conchae/software/esm/scripts/extract.py \
esm2_t33_650M_UR50D \
/home/conchae/PhageDepo_pdb/PhageDepo.dataset.multi.fasta \
/home/conchae/PhageDepo_pdb/PhageDepo.dataset.multi.fasta.esm2_out \
--repr_layers 0 32 33 \
--include mean per_tok

***
## III.T-SNE