## Modelling multiple protein structures together


We use a combination of homology and alphafold (AI models) available in SWISSMODEL, the automated protein structure homology-modelling server, accessible via the Expasy web server. 

Directly taken from;

https://swissmodel.expasy.org/docs/help#modelling_api




Steps:

1. Obtain API token: Downloaded from account @ swissmodel and stored in file @ ~/.config/swissmodel/apikey.txt
2. Then, Load all provided fasta sequences


In [1]:
from Bio import SeqIO

# Read the FASTA file and store sequences in a dictionary
fasta_dict = {}
with open("pseudomonas_sequences.fna", "r") as fasta_file:
    for record in SeqIO.parse(fasta_file, "fasta"):
        fasta_dict[record.id] = str(record.seq)

3. Start Automodel projects for each sequence

In [None]:
import os

apikey_path = os.path.expanduser("~/.config/swissmodel/apikey.txt")
with open(apikey_path, 'r') as f:
    api_key = f.read().strip()

print("API Key imported successfully:", api_key)

In [5]:
import requests


for record_id, sequence in fasta_dict.items():
    print(f"Submitting sequence for {record_id} ...")
    response = requests.post(
        "https://swissmodel.expasy.org/automodel",
        headers={ "Authorization": f"Token {api_key}" },
        json={ 
            "target_sequences": [sequence],
            "project_title": f"Automodelling project for {record_id}"
        }
    )
    if response.status_code == 202:
        print(f"Submission for {record_id} successful.")
        if 'responses_by_record' not in globals():
            responses_by_record = {}
        responses_by_record[record_id] = response
    else:
        print(f"Error submitting {record_id}: {response.status_code} - {response.text}")



Submitting sequence for 2012|D0E7M2|D0E7M2_PSEAI ...
Submission for 2012|D0E7M2|D0E7M2_PSEAI successful.
Submitting sequence for 2016|WP_063840509.1:61-251 ...
Submission for 2016|WP_063840509.1:61-251 successful.
Submitting sequence for 1999|Q9ZNQ0|Q9ZNQ0_PSEAI ...
Submission for 1999|Q9ZNQ0|Q9ZNQ0_PSEAI successful.
Submitting sequence for 2007|A6VD77|A6VD77_PSEA7 ...
Submission for 2007|A6VD77|A6VD77_PSEA7 successful.
Submitting sequence for 2014|W1MT92|W1MT92_PSEAI ...
Submission for 2014|W1MT92|W1MT92_PSEAI successful.
Submitting sequence for 2015|A0A0H2ZL55|A0A0H2ZL55_PSEAB ...
Submission for 2015|A0A0H2ZL55|A0A0H2ZL55_PSEAB successful.
Submitting sequence for 2018|A0A3A1ZXK0|A0A3A1ZXK0_PSEAI ...
Submission for 2018|A0A3A1ZXK0|A0A3A1ZXK0_PSEAI successful.
Submitting sequence for 1999|Q9X6V3|Q9X6V3_PSEAI ...
Submission for 1999|Q9X6V3|Q9X6V3_PSEAI successful.
Submitting sequence for 2015|A0A0A8RE86|A0A0A8RE86_PSEAI ...
Submission for 2015|A0A0A8RE86|A0A0A8RE86_PSEAI successful.
Sub

4. Fetch the results

In [19]:
r = responses_by_record['2012|D0E7M2|D0E7M2_PSEAI']
project_id = r.json()["project_id"]

r_new = requests.get(
        f"https://swissmodel.expasy.org/project/{ project_id }/models/summary/", 
        headers={ "Authorization": f"Token {api_key}" })
r_new.json()

{'project_id': '53a3c0',
 'status': 'INITIALISED',
 'models': None,
 'date_created': '2025-04-11T05:26:09.111523Z',
 'project_title': 'Automodelling project for 2012|D0E7M2|D0E7M2_PSEAI'}

In [27]:
for record_id, sequence in fasta_dict.items():
    print(f"Submitting sequence for {record_id} ...")
    response = responses_by_record[record_id]
    if response.status_code == 202:
        print(f"Submission for {record_id} successful.")
    else:
        print(f"Error submitting {record_id}: {response.status_code} - {response.text}")

    # Obtain the project_id from the response created above
    project_id = response.json()["project_id"]

    
    # Update the status from the server 
    response = requests.get(
        f"https://swissmodel.expasy.org/project/{ project_id }/models/summary/", 
        headers={ "Authorization": f"Token {api_key}" })

    # Update the status
    status = response.json()["status"]

    print('Job status is now', status)


Submitting sequence for 2012|D0E7M2|D0E7M2_PSEAI ...
Submission for 2012|D0E7M2|D0E7M2_PSEAI successful.
Job status is now INITIALISED
Submitting sequence for 2016|WP_063840509.1:61-251 ...
Submission for 2016|WP_063840509.1:61-251 successful.
Job status is now INITIALISED
Submitting sequence for 1999|Q9ZNQ0|Q9ZNQ0_PSEAI ...
Submission for 1999|Q9ZNQ0|Q9ZNQ0_PSEAI successful.
Job status is now INITIALISED
Submitting sequence for 2007|A6VD77|A6VD77_PSEA7 ...
Submission for 2007|A6VD77|A6VD77_PSEA7 successful.
Job status is now INITIALISED
Submitting sequence for 2014|W1MT92|W1MT92_PSEAI ...
Submission for 2014|W1MT92|W1MT92_PSEAI successful.
Job status is now INITIALISED
Submitting sequence for 2015|A0A0H2ZL55|A0A0H2ZL55_PSEAB ...
Submission for 2015|A0A0H2ZL55|A0A0H2ZL55_PSEAB successful.
Job status is now INITIALISED
Submitting sequence for 2018|A0A3A1ZXK0|A0A3A1ZXK0_PSEAI ...
Submission for 2018|A0A3A1ZXK0|A0A3A1ZXK0_PSEAI successful.
Job status is now INITIALISED
Submitting sequence

5. Check if the job is COMPLETED and fetch the model coordinates

In [None]:
import os
import gzip
import shutil
import tqdm.notebook as tqdm
# Create a directory to store the downloaded models
out_dir = "downloaded_models"
os.makedirs(out_dir, exist_ok=True)

for record_id, sequence in fasta_dict.items():
    response = responses_by_record[record_id]
    response_object = response.json()
    if response_object['status']=='COMPLETED':
        for model in response_object['models']:
            print(model['modelcif_url'])    # ModelCif Format
            print(model['coordinates_url']) # PDB Format
            # For each model, download the coordinates file in PDB format
            url = model["coordinates_url"]
            download_response = requests.get(url)
            if download_response.status_code == 200:
                # Save using model_id in filename to distinguish files
                filename = os.path.join(out_dir, f"{record_id}_{model['model_id']}_coordinates.pdb.gz")
                with open(filename, "wb") as f:
                    f.write(download_response.content)
                # Remove the .gz extension to get the target pdb filename
                unp_file = filename[:-3]

                # Decompress the .gz file to a .pdb file
                with gzip.open(filename, 'rb') as f_in, open(unp_file, 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)

                # Read the decompressed pdb file content
                with open(unp_file, 'r') as pdb_in:
                    pdb_content = pdb_in.read()

                # Prepend REMARK lines containing the record id and its fasta sequence 
                remarks = f"REMARK Record ID: {record_id}\nREMARK Sequence: {fasta_dict[record_id]}\n"
                with open(unp_file, 'w') as pdb_out:
                    pdb_out.write(remarks + pdb_content)
                print("Downloaded:", filename)
            else:
                print("Failed to download:", url)

https://swissmodel.expasy.org/project/4e76ca/models/01.cif.gz
https://swissmodel.expasy.org/project/4e76ca/models/01.pdb.gz
Downloaded: downloaded_models/01_coordinates.pdb.gz


In [None]:
old_projectids = [
    "da6be8",
    "01049c",
    "086f25",
    "084b59",
    "b04f1d",
    "54b0ee",
    "372310",
    "d3fb0c",
    "e73c1d",
    "70a89d",
    "005c96",
    "b70498",
    "ace2d9",
    "2a6ce4"
]

old_sequences = [
    "2018B|A0A2X4EZ45|A0A2X4EZ45_PSEAI",
    "2018A|A0A2R3INH2|A0A2R3INH2_PSEAI",
    "2007|A6V2Q7|A6V2Q7_PSEA7",
    "1996|P48372|GYRA_PSEAE",
    "2018|A0A2R3ILK4|A0A2R3ILK4_PSEAI",
    "2015|A0A0A8RE86|A0A0A8RE86_PSEAI",
    "1999|Q9X6V3|Q9X6V3_PSEAI",
    "2018|A0A3A1ZXK0|A0A3A1ZXK0_PSEAI",
    "2015|A0A0H2ZL55|A0A0H2ZL55_PSEAB",
    "2014|W1MT92|W1MT92_PSEAI",
    "2007|A6VD77|A6VD77_PSEA7",
    "1999|Q9ZNQ0|Q9ZNQ0_PSEAI",
    "2016|WP_063840509.1:61-251",
    "2012|D0E7M2|D0E7M2_PSEAI"
]
old_sequences.reverse()

['2012|D0E7M2|D0E7M2_PSEAI',
 '2016|WP_063840509.1:61-251',
 '1999|Q9ZNQ0|Q9ZNQ0_PSEAI',
 '2007|A6VD77|A6VD77_PSEA7',
 '2014|W1MT92|W1MT92_PSEAI',
 '2015|A0A0H2ZL55|A0A0H2ZL55_PSEAB',
 '2018|A0A3A1ZXK0|A0A3A1ZXK0_PSEAI',
 '1999|Q9X6V3|Q9X6V3_PSEAI',
 '2015|A0A0A8RE86|A0A0A8RE86_PSEAI',
 '2018|A0A2R3ILK4|A0A2R3ILK4_PSEAI',
 '1996|P48372|GYRA_PSEAE',
 '2007|A6V2Q7|A6V2Q7_PSEA7',
 '2018A|A0A2R3INH2|A0A2R3INH2_PSEAI',
 '2018B|A0A2X4EZ45|A0A2X4EZ45_PSEAI']

In [34]:
import os
import gzip
import shutil
out_dir = "downloaded_models"

for i, pid in enumerate(old_projectids):
    
    # Update the status from the server 
    response = requests.get(
        f"https://swissmodel.expasy.org/project/{pid}/models/summary/", 
        headers={"Authorization": f"Token {api_key}"})

    # Update the status
    status = response.json()["status"]
    response_json  = response.json()
    print('Job status is now', status)
    print(response_json)
    for model in response_json['models']:
            print(model['coordinates_url']) # PDB Format
            # For each model, download the coordinates file in PDB format
            url = model["coordinates_url"]
            download_response = requests.get(url)
            if download_response.status_code == 200:
                # Save using model_id in filename to distinguish files
                if model['model_id'] == '01':
                    filename = os.path.join(out_dir, f"{old_sequences[i]}_{model['model_id']}_coordinates.pdb.gz")
                    with open(filename, "wb") as f:
                        f.write(download_response.content)
                    # Remove the .gz extension to get the target pdb filename
                    unp_file = filename[:-3]

                    # Decompress the .gz file to a .pdb file
                    with gzip.open(filename, 'rb') as f_in, open(unp_file, 'wb') as f_out:
                        shutil.copyfileobj(f_in, f_out)

                    # Read the decompressed pdb file content
                    with open(unp_file, 'r') as pdb_in:
                        pdb_content = pdb_in.read()

                    # Prepend REMARK lines containing the record id and its fasta sequence 
                    remarks = f"REMARK Record ID: {record_id}\nREMARK Sequence: {fasta_dict[record_id]}\n"
                    with open(unp_file, 'w') as pdb_out:
                        pdb_out.write(remarks + pdb_content)
                    print("Downloaded:", filename)
                else:
                    print("Failed to download:", url)

Job status is now COMPLETED
{'project_id': 'da6be8', 'status': 'COMPLETED', 'models': [{'model_id': '01', 'status': 'COMPLETED', 'gmqe': 0.9, 'qmean_global': {'avg_local_score': 0.84}, 'coordinates_url': 'https://swissmodel.expasy.org/project/da6be8/models/01.pdb.gz', 'modelcif_url': 'https://swissmodel.expasy.org/project/da6be8/models/01.cif.gz'}], 'date_created': '2025-04-09T16:38:30.976108Z', 'project_title': 'Automodelling project for 2012|D0E7M2|D0E7M2_PSEAI', 'view_url': 'https://swissmodel.expasy.org/project/da6be8/view'}
https://swissmodel.expasy.org/project/da6be8/models/01.pdb.gz
Downloaded: downloaded_models/2012|D0E7M2|D0E7M2_PSEAI_01_coordinates.pdb.gz
Job status is now COMPLETED
{'project_id': '01049c', 'status': 'COMPLETED', 'models': [{'model_id': '01', 'status': 'COMPLETED', 'gmqe': 0.9, 'qmean_global': {'avg_local_score': 0.85}, 'coordinates_url': 'https://swissmodel.expasy.org/project/01049c/models/01.pdb.gz', 'modelcif_url': 'https://swissmodel.expasy.org/project/01