# Find all PDBs associated with UniProt AC.

Conny Yu, 08/2024<br><br>
Using UniProt API, find all PDBs associated to the UniProt entry.<br>
Using PDB API, find all references (including preprints) and their PMIDs associated to each PDB.<br>
Input: UniProt AC.

In [7]:
import requests
from collections import defaultdict

def get_pdb_codes(uniprot_ac):
    # Query UniProt API to get the PDB information
    uniprot_url = f"https://www.uniprot.org/uniprot/{uniprot_ac}.xml"
    response = requests.get(uniprot_url)
    
    if response.status_code != 200:
        print(f"\033[1mError fetching data from UniProt for AC {uniprot_ac}\033[0m")
        return []
    
    xml_data = response.text
    
    # Extract the PDB codes from the XML data
    pdb_codes = set()
    for line in xml_data.splitlines():
        if 'dbReference type="PDB"' in line:
            start = line.find('id="') + 4
            end = line.find('"', start)
            pdb_code = line[start:end]
            pdb_codes.add(pdb_code)
    
    return list(pdb_codes)

def get_pdb_pmids(pdb_code):
    # Query RCSB PDB API to get references for the PDB code
    pdb_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_code}"
    response = requests.get(pdb_url)
    
    if response.status_code != 200:
        print(f"\033[1mError fetching data from RCSB PDB for PDB code {pdb_code}\033[0m")
        return []
    
    pdb_data = response.json()
    
    # Extract PMIDs from the PDB data
    pmids = set()
    if 'rcsb_primary_citation' in pdb_data:
        primary_citation = pdb_data['rcsb_primary_citation']
        if 'pdbx_database_id_pub_med' in primary_citation:
            pmids.add(primary_citation['pdbx_database_id_pub_med'])
    
    if 'citation' in pdb_data:
        for citation in pdb_data['citation']:
            if 'pdbx_database_id_pub_med' in citation:
                pmids.add(citation['pdbx_database_id_pub_med'])
    
    return list(pmids)

def main():
    # Prompt the user for the UniProt AC number
    uniprot_ac = input("Enter UniProt AC: ").strip()
    
    # Get the list of PDB codes associated with the given UniProt AC number
    pdb_codes = get_pdb_codes(uniprot_ac)
    
    pmid_to_pdb = defaultdict(list)
    all_pmids = set()
    
    if pdb_codes:
        for pdb_code in pdb_codes:
            pmids = get_pdb_pmids(pdb_code)
            for pmid in pmids:
                pmid_to_pdb[pmid].append(pdb_code)
                all_pmids.add(pmid)
        
        print(f"\033[1mPMIDs and associated PDB codes for UniProt AC {uniprot_ac}:\033[0m")
        for pmid, pdb_list in pmid_to_pdb.items():
            print(f"PMID: {pmid}")
            print(f"  Associated PDBs: {', '.join(pdb_list)}")

        # All associated PDBs
        print("\n\033[1mAll associated PDBs:\033[0m")
        print(", ".join(str(pdb_code).lower() for pdb_code in pdb_codes))
        
        # Output for Asterix GM
        print("\n\033[1mAll associated references:\033[0m")
        print(", ".join(str(pmid) for pmid in all_pmids))
        
        # Output for PyMOL command
        print("\n\033[1mTo use in Pymol command:\033[0m")
        print("fetch " + "; fetch ".join(str(pdb_code).lower() for pdb_code in pdb_codes))
    else:
        print(f"No PDB codes found for UniProt AC {uniprot_ac}.")

if __name__ == "__main__":
    main()


Enter UniProt AC:  Q9UQ13


[1mPMIDs and associated PDB codes for UniProt AC Q9UQ13:[0m
PMID: 35831509
  Associated PDBs: 7UPI, 7T7A
PMID: 36175670
  Associated PDBs: 7TVG, 7TVF
PMID: 35830882
  Associated PDBs: 7TYG, 7TXH
PMID: 35768504
  Associated PDBs: 7SD1, 7SD0

[1mAll associated PDBs:[0m
7upi, 7tvg, 7t7a, 7tyg, 7tvf, 7sd1, 7txh, 7sd0

[1mAll associated references:[0m
35768504, 35830882, 35831509, 36175670

[1mTo use in Pymol command:[0m
fetch 7upi; fetch 7tvg; fetch 7t7a; fetch 7tyg; fetch 7tvf; fetch 7sd1; fetch 7txh; fetch 7sd0
