In [1]:
# Install dependencies
!pip install esm
!pip install biopython
!pip install ipywidgets

In [2]:
# Import libraries
# Standard libraries
import pandas as pd
import numpy as np
import sys
from Bio import SeqIO

# PyTorch libraries
import torch

# ESM3 
from huggingface_hub import login
from esm.models.esm3 import ESM3
from esm.sdk.api import ESM3InferenceClient, ESMProtein, LogitsConfig, GenerationConfig

In [4]:
## Load ESM3 models from forge
from getpass import getpass
token = getpass("Token from Forge console: ")

from esm.sdk import client
model = client(
    model="esm3-medium-2024-08", 
    url="https://forge.evolutionaryscale.ai", 
    token=token
) 

In [8]:
## Read all sequences as fasta files
fasta_df = []
fasta_path = '/home/azureuser/cloudfiles/code/Users/jc62/projects/direct_sequence_analysis/data/antiphage_sequences_short.fasta'

fasta_df = pd.DataFrame(columns=["description", "sequence"])
for record in SeqIO.parse(fasta_path, "fasta"):
    fasta_df = pd.concat(
        [fasta_df, pd.DataFrame(
            [[record.id, str(record.seq)]], 
            columns=["description", "sequence"])], 
        ignore_index=True
    )

In [10]:
## Generate structure with structure track
def structure_generation(target_sequence, num_steps):
    protein = ESMProtein(sequence=target_sequence)
    structure = model.generate(
        protein,
        GenerationConfig(
            track="structure",
            num_steps=num_steps,
            )
        )
    return structure

In [11]:
## Generate structures with ESM3 structure track
structure = []
num_steps = 8
pdb_path = "/home/azureuser/cloudfiles/code/Users/jc62/projects/direct_sequence_analysis/data/pdb_files/"

for i in range(len(fasta_df)):
    target_sequence = fasta_df['sequence'][i]
    structure = structure_generation(target_sequence, num_steps)
    structure.to_pdb(f"{pdb_path}structure_{i}.pdb")