In [12]:
#import sys, os
from urllib.parse import urljoin
import requests
import json
import pprint
import numpy as np
import pandas as pd

In [13]:
def make_pfam_request(accID):
    """Make a request to pfam (within InterPro) with the accession ID."""
    url = urljoin("https://www.ebi.ac.uk/interpro/api/", "entry/pfam/", accID)
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print("Error: {}".format(response.status_code))
        return None

In [14]:
def make_InterPro_request(accID):
    """Get InterPro entry JSON with accession ID."""
    url = urljoin("https://www.ebi.ac.uk/interpro/api/", "entry/", accID)
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print("Error: {}".format(response.status_code))
        return None

In [15]:
# Get the PFAM entry
PFAM_ID = "PF00067"
pfam_meta = make_pfam_request(PFAM_ID)
print(json.dumps(pfam_meta, indent=2))

{
  "count": 19632,
  "next": "https://www.ebi.ac.uk/interpro/api/entry/pfam/?cursor=cD1QRjAwMDIw",
  "previous": null,
  "results": [
    {
      "metadata": {
        "accession": "PF00001",
        "name": "7 transmembrane receptor (rhodopsin family)",
        "source_database": "pfam",
        "type": "family",
        "integrated": "IPR000276",
        "member_databases": null,
        "go_terms": null
      }
    },
    {
      "metadata": {
        "accession": "PF00002",
        "name": "7 transmembrane receptor (Secretin family)",
        "source_database": "pfam",
        "type": "family",
        "integrated": "IPR000832",
        "member_databases": null,
        "go_terms": null
      }
    },
    {
      "metadata": {
        "accession": "PF00003",
        "name": "7 transmembrane sweet-taste receptor of 3 GCPR",
        "source_database": "pfam",
        "type": "domain",
        "integrated": "IPR017978",
        "member_databases": null,
        "go_terms": null
     

In [16]:
ex_prot_ID_pfam = pfam_meta["results"][0]["metadata"]["accession"]
ex_prot_meta = make_pfam_request(ex_prot_ID_pfam)
print(json.dumps(ex_prot_meta, indent=2))

{
  "count": 19632,
  "next": "https://www.ebi.ac.uk/interpro/api/entry/pfam/?cursor=cD1QRjAwMDIw",
  "previous": null,
  "results": [
    {
      "metadata": {
        "accession": "PF00001",
        "name": "7 transmembrane receptor (rhodopsin family)",
        "source_database": "pfam",
        "type": "family",
        "integrated": "IPR000276",
        "member_databases": null,
        "go_terms": null
      }
    },
    {
      "metadata": {
        "accession": "PF00002",
        "name": "7 transmembrane receptor (Secretin family)",
        "source_database": "pfam",
        "type": "family",
        "integrated": "IPR000832",
        "member_databases": null,
        "go_terms": null
      }
    },
    {
      "metadata": {
        "accession": "PF00003",
        "name": "7 transmembrane sweet-taste receptor of 3 GCPR",
        "source_database": "pfam",
        "type": "domain",
        "integrated": "IPR017978",
        "member_databases": null,
        "go_terms": null
     

In [17]:
ex_prot_ID_interpro = ex_prot_meta["results"][0]["metadata"]["integrated"]
ex_prot_meta = make_InterPro_request(ex_prot_ID_interpro)
print(json.dumps(ex_prot_meta, indent=2))

{
  "entries": {
    "member_databases": {
      "pfam": 19417,
      "cathgene3d": 6566,
      "ssf": 2019,
      "panther": 15840,
      "cdd": 18874,
      "profile": 1336,
      "smart": 1312,
      "tigrfams": 4444,
      "prosite": 1311,
      "prints": 2106,
      "hamap": 2383,
      "pirsf": 3292,
      "sfld": 303
    },
    "integrated": 51013,
    "unintegrated": 28432,
    "interpro": 38518,
    "all": 119259
  }
}


In [22]:
# Get list of (family) member proteins
P450_INTERPRO_ID = "IPR001128"
url = urljoin("https://www.ebi.ac.uk/interpro/api/protein/UniProt/entry/InterPro/", P450_INTERPRO_ID)
response = requests.get(url)
if response.status_code == 200:
    interpro_meta = response.json()
    print(json.dumps(interpro_meta, indent=2))

200
{
  "count": 455629,
  "next": "https://www.ebi.ac.uk/interpro/api/protein/UniProt/entry/InterPro/IPR001128/?cursor=source%3As%3Aa0a010qdi7",
  "previous": null,
  "results": [
    {
      "metadata": {
        "accession": "A0A010PZX0",
        "name": "Trichothecene C-8 hydroxylase",
        "source_database": "unreviewed",
        "length": 517,
        "source_organism": {
          "taxId": "1445577",
          "scientificName": "Colletotrichum fioriniae PJ7",
          "fullName": "Colletotrichum fioriniae PJ7"
        }
      },
      "entries": [
        {
          "accession": "IPR001128",
          "entry_protein_locations": [
            {
              "fragments": [
                {
                  "start": 227,
                  "end": 483,
                  "dc-status": "CONTINUOUS"
                }
              ],
              "model": null,
              "score": null
            }
          ],
          "protein_length": 517,
          "source_database": "i

In [23]:
# Get the sequence for each protein
curID = interpro_meta["results"][0]["metadata"]["accession"]
url = urljoin("https://www.ebi.ac.uk/interpro/api/protein/UniProt/", curID)
response = requests.get(url)
if response.status_code == 200:
    prot_data = response.json()
    print(json.dumps(prot_data, indent=2))

{
  "metadata": {
    "accession": "A0A010PZX0",
    "id": "A0A010PZX0_9PEZI",
    "source_organism": {
      "taxId": "1445577",
      "scientificName": "Colletotrichum fioriniae PJ7",
      "fullName": "Colletotrichum fioriniae PJ7"
    },
    "name": "Trichothecene C-8 hydroxylase",
    "description": null,
    "length": 517,
    "sequence": "MSLSWSGWPATLEPQQSALGLLFVVLSLTLAYISIPGRHDHLPYINRPPKWDFLGQKTKQHFVSNARSLMANAREAFKGKPYRMFTDLGDLIVIPAHHADEMRNERSLNFLDAFVDNFHPNIPGFDGFAFDGRKDELLHKTINKKITKMLNEITAPLSLEADFATRLILGTSTEWREIPLQEALLNLVARLSSRVFLGDELCRNDAWIKITGSYSVNTFSAAEILRQYPSYLRHIACYFIPQCRLLKEQVAEARRVLNPVLEKREWEKKMALSEGRTEPSYKDAIQWVMEESQGSPFDPVGAQLGLSIVAIHTTTDLATETMLRLMVRPKLMEDVRAEIVAVLRKEGWTKSALFNMKLLDSVIKEAQRLKPTTSATMNRKATRQVKLPGGLVLEKGDRCMADLGSMVDPNVYPNPLEFDGYRFFRMRGDPKMDSKAHLVSTSVAHMGFGHGLHACPGRFFASNEVKVLLCHLVQKYDWKLDSAFEHTIHEFGLSLSSGSTKAFVARRKNTEIDIDAL",
    "proteome": "UP000020467",
    "gene": "CFIO01_07482",
    "go_terms": [
      {
        "identifier": "GO:0004497",
        "na