## Modelling multiple protein structures together


We use a combination of homology and alphafold (AI models) available in SWISSMODEL, the automated protein structure homology-modelling server, accessible via the Expasy web server. 

Directly taken from;

https://swissmodel.expasy.org/docs/help#modelling_api




Steps:

1. Obtain API token: Downloaded from account @ swissmodel and stored in file @ ~/.config/swissmodel/apikey.txt
2. Then, Load all provided fasta sequences


In [13]:
from Bio import SeqIO

# Read the FASTA file and store sequences in a dictionary
fasta_dict = {}
with open("pseudomonas_sequences.fna", "r") as fasta_file:
    for record in SeqIO.parse(fasta_file, "fasta"):
        fasta_dict[record.id] = str(record.seq)

3. Start Automodel projects for each sequence

In [None]:
import os

apikey_path = os.path.expanduser("~/.config/swissmodel/apikey.txt")
with open(apikey_path, 'r') as f:
    api_key = f.read().strip()

print("API Key imported successfully:", api_key)

In [None]:
import requests


for record_id, sequence in fasta_dict.items():
    print(f"Submitting sequence for {record_id} ...")
    response = requests.post(
        "https://swissmodel.expasy.org/automodel",
        headers={ "Authorization": f"Token {api_key}" },
        json={ 
            "target_sequences": [sequence],
            "project_title": f"Automodelling project for {record_id}"
        }
    )
    if response.status_code == 201:
        print(f"Submission for {record_id} successful.")
        if 'responses_by_record' not in globals():
            responses_by_record = {}
        responses_by_record[record_id] = response
    else:
        print(f"Error submitting {record_id}: {response.status_code} - {response.text}")



4. Fetch the results

In [None]:
for record_id, sequence in fasta_dict.items():
    print(f"Submitting sequence for {record_id} ...")
    response = responses_by_record[record_id]
    if response.status_code == 201:
        print(f"Submission for {record_id} successful.")
    else:
        print(f"Error submitting {record_id}: {response.status_code} - {response.text}")

    # Obtain the project_id from the response created above
    project_id = response.json()["project_id"]

    # And loop until the project completes
    import time
    while True:
        # We wait for some time
        time.sleep(10)

        # Update the status from the server 
        response = requests.get(
            f"https://swissmodel.expasy.org/project/{ project_id }/models/summary/", 
            headers={ "Authorization": f"Token {api_key}" })

        # Update the status
        status = response.json()["status"]

        print('Job status is now', status)

        if status in ["COMPLETED", "FAILED"]:
            break

Job status is now COMPLETED


4. Check if the job is COMPLETED and fetch the model coordinates

In [None]:
import os
import gzip
import shutil
import tqdm.notebook as tqdm
# Create a directory to store the downloaded models
out_dir = "downloaded_models"
os.makedirs(out_dir, exist_ok=True)

for record_id, sequence in fasta_dict.items():
    response = responses_by_record[record_id]
    response_object = response.json()
    if response_object['status']=='COMPLETED':
        for model in response_object['models']:
            print(model['modelcif_url'])    # ModelCif Format
            print(model['coordinates_url']) # PDB Format
            # For each model, download the coordinates file in PDB format
            url = model["coordinates_url"]
            download_response = requests.get(url)
            if download_response.status_code == 200:
                # Save using model_id in filename to distinguish files
                filename = os.path.join(out_dir, f"{record_id}_{model['model_id']}_coordinates.pdb.gz")
                with open(filename, "wb") as f:
                    f.write(download_response.content)
                # Remove the .gz extension to get the target pdb filename
                unp_file = filename[:-3]

                # Decompress the .gz file to a .pdb file
                with gzip.open(filename, 'rb') as f_in, open(unp_file, 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)

                # Read the decompressed pdb file content
                with open(unp_file, 'r') as pdb_in:
                    pdb_content = pdb_in.read()

                # Prepend REMARK lines containing the record id and its fasta sequence 
                remarks = f"REMARK Record ID: {record_id}\nREMARK Sequence: {fasta_dict[record_id]}\n"
                with open(unp_file, 'w') as pdb_out:
                    pdb_out.write(remarks + pdb_content)
                print("Downloaded:", filename)
            else:
                print("Failed to download:", url)

https://swissmodel.expasy.org/project/4e76ca/models/01.cif.gz
https://swissmodel.expasy.org/project/4e76ca/models/01.pdb.gz
Downloaded: downloaded_models/01_coordinates.pdb.gz
