In [None]:
import numpy as np
import requests, sys
from pydantic import BaseModel, Field

requestURL = "https://www.ebi.ac.uk/proteins/api/proteins/P08183"

r = requests.get(requestURL, headers={ "Accept" : "application/json"})

if not r.ok:
  r.raise_for_status()
  sys.exit()

responseBody = r.text
uniprot_dict = r.json()

In [None]:
import yaml
yaml.safe_dump(uniprot_dict, open('P08183.yaml', 'w'))

In [None]:
'pdb' in r.text

In [None]:
def request_uniprot(uniprot_id):
    """
    A function to request a protein entry from the UniProt API
    """
    requestURL = f"https://www.ebi.ac.uk/proteins/api/proteins/{uniprot_id}"
    r = requests.get(requestURL, headers={ "Accept" : "application/json"})
    if not r.ok:
        r.raise_for_status()
        sys.exit()
    return r.json()

In [None]:
def parse_resolved_chains(chain_str):
    """
    A function to parse resolved chains for a particular uniprot id from a Uniprot string
    """
    chain_letters = chain_str.split('=')[0].split('/')
    start, end = chain_str.split('=')[1].split('-')
    
    resolved_chains = []
    for chain in chain_letters:
        resolved_chains.append(ResolvedChain(chain_id=chain, start=int(start), end=int(end)))
    return resolved_chains

In [None]:
def parse_uniprot_accession(uniprot_dict):
    """
    A function to parse the UniProt accession from a UniProt API response
    """
    pdb_ids = [ref for ref in uniprot_dict['dbReferences'] if ref['type'] == 'PDB']
    af_ids = [ref for ref in uniprot_dict['dbReferences'] if ref['type'] == 'AlphaFoldDB']
    
    refs = []
    for ref in pdb_ids:
        properties = ref['properties']
        refs.append(ExperimentalStructure(
            uniprot_id=uniprot_dict['accession'][0],
            pdb_id=ref['id'],
            method=properties['method'],
            resolution=properties['resolution'].split(' ')[0],
            resolved_chains=parse_resolved_chains(properties['chains'])
        ))
    for ref in af_ids:
        refs.append(PredictedStructure(
            uniprot_id=uniprot_dict['accession'][0],
            af_id=ref['id']
        ))
    return Target(
        uniprot_id=uniprot_dict['accession'][0],
        sequence=uniprot_dict['sequence']['sequence'],
        structures=refs
    )

In [None]:
class StructureEntry(BaseModel):
    """
    A class to represent a structure entry
    """
    uniprot_id: str = Field(..., title="The UniProt ID of the protein")

In [None]:
class ResolvedChain(BaseModel):
    """
    A class to represent a resolved chain
    """
    chain_id: str = Field(..., title="The chain ID")
    start: int = Field(..., title="The start position of the chain")
    end: int = Field(..., title="The end position of the chain")
    
    @property
    def length(self) -> int:
        return self.end - self.start

In [None]:
class ExperimentalStructure(StructureEntry):
    """
    A class to represent an experimental structure entry
    """
    pdb_id: str = Field(..., title="The PDB ID of the structure")
    method: str = Field(..., title="The method used to determine the structure")
    resolution: str = Field(..., title="The resolution (å) of the structure")
    resolved_chains: list[ResolvedChain] = Field(..., title="A list of resolved chains")
    
    @property
    def sequence_coverage(self):
        return sum([c.length for c in self.resolved_chains])

In [None]:
class PredictedStructure(StructureEntry):
    """
    A class to represent a predicted structure entry
    """
    af_id: str = Field(..., title="The AlphaFold DB ID of the structure")

In [None]:
class Target(BaseModel):
    """
    A class to represent a protein target
    """
    uniprot_id: str = Field(..., title="The UniProt ID of the protein")
    sequence: str = Field(..., title="The protein sequence")
    structures: list[StructureEntry] = Field(..., title="A list of structure entries")
    
    @property
    def sequence_length(self) -> int:
        return len(self.sequence)
    
    @property
    def experimental_structures(self):
        return [s for s in self.structures if isinstance(s, ExperimentalStructure)]
    
    @property
    def average_coverage(self):
        return np.mean(np.array([s.sequence_coverage for s in self.experimental_structures]) / self.sequence_length)
    
    @property
    def n_experimental_structures(self):
        return len([s for s in self.structures if isinstance(s, ExperimentalStructure)])
    
    @property
    def n_predicted_structures(self):
        return len([s for s in self.structures if isinstance(s, PredictedStructure)])

In [None]:
target = parse_uniprot_accession(uniprot_dict)

In [None]:
target.average_coverage